





aaa
clear


*** Define Variance decomposition functions
{


capture program drop RecoverAlphaGamma
program define RecoverAlphaGamma


local gamma = _b[`2'] / _b[`1']
local alpha = ( 1 - _b[`1'] ) / ( 1 + _b[`1'] * `gamma' )


local alpha %6.2f `alpha'
local gamma %6.2f `gamma'


di ""
di "alpha"
di `alpha'
di ""
di "gamma"
di `gamma'
di ""

end


capture program drop VarianceDecomposition2
program define VarianceDecomposition2

/* Arguments
	1 and 2    : variables in decomposition
	3, 4 and 5 : description of each term: 1 , 2 and covariance
	6 		   : color
	7 		   : name to save it
	8 		   : variable for weights


*/
preserve

if "`8'" == "NoWeights" {

corr `1' `2', co
mat Cov = r(C)
svmat Cov , names("Cov")

}

if "`8'" != "NoWeights" {

corr `1' `2' [ w = `8' ], co
mat Cov = r(C)
svmat Cov , names("Cov")

}

keep *Cov*
keep if _n <= 2
set obs 4

replace Cov1 = 2 * Cov1[2] if _n == 3
replace Cov1 = Cov2[2] if _n == 2

keep Cov1
rename Cov1 var

egen tot = sum(var)
replace var = tot if _n == 4

gen des = ""
replace des = "`3'" if _n == 1
replace des = "`4'" if _n == 2
replace des = "`5'" if _n == 3

replace var = 100 * var / tot
drop tot
gen n = _n

graph hbar var if _n <= 3 , over(des, sort(n)) ///
	  bar(1 , color(`6')) /// bar(2 , color(navy) fintensity(50) lwidth(0) ) ///
	  xsize(8) ysize(5) ///
	  graphregion(color(white)) bgcolor(white) ///
	  ytitle(Percent) ///
	  name(`7', replace)

if "`9'" != "NoSave" {
save "`9'", replace
}	  
	  
restore


end






capture program drop VarianceDecomposition3
program define VarianceDecomposition3

* Order of covariances in title:
	* Cov12 Cov13 Cov23

preserve

if "`12'" == "NoWeights" {

corr `1' `2' `3' , co
mat Cov = r(C)
svmat Cov , names("Cov")

}

if "`12'" != "NoWeights" {

corr `1' `2' `3' [ w = `12' ], co
mat Cov = r(C)
svmat Cov , names("Cov")

}

keep *Cov*
keep if _n <= 3
set obs 7

forvalues i=2(1)3 {
replace Cov1 = 2 * Cov1[`i'] if _n == 2 + `i'
}
replace Cov1 = 2 * Cov2[3] if _n == 6

forvalues i=2(1)3 {
replace Cov1 = Cov`i'[`i'] if _n == `i'
}

keep Cov1
rename Cov1 var

egen tot = sum(var)
replace var = tot if _n == 7

gen des = ""
replace des = "`4'" if _n == 1
replace des = "`5'" if _n == 2
replace des = "`6'" if _n == 3
replace des = "`7'" if _n == 4
replace des = "`8'" if _n == 5
replace des = "`9'" if _n == 6

replace var = 100 * var / tot
drop tot
gen n = _n

graph hbar var if _n <= 6 , over(des, sort(n)) ///
	  bar(1 , color(`10')) /// bar(2 , color(navy) fintensity(50) lwidth(0) ) ///
	  xsize(8) ysize(5) ///
	  graphregion(color(white)) bgcolor(white) ///
	  ytitle(Percent) ///
	  name(`11', replace)
	  
	  
	  
if "`13'" != "NoSave" {
save "`13'", replace
}	  
	  
	  
restore



end


}

*** Command to show all variables
{

capture program drop ShowVariables
program define ShowVariables
	foreach var of varlist *{
	di "`var'" _col(20) "`: var l `var''" _col(50)  "`: val l `var''"
	}
end

}

*** Colors
{

global MyBlue   57 96 173 // 60 106 209 
global MyOrange 213 82 55
global MyDarkOrange 169 74 1
global MyGreen  53 98 26
global MyLightGreen 121 148 103

}

*** Paths
{
cd "XXX/7_statarev/output"
global graphs  XXX/4_graphsrev
global tables  XXX/5_tablesrev
global root    XXX/7_statarev/
global root2   XXX/7_statarev
global matlab  XXX/10_matlabrevQJE/source
global matlab2 XXX/10_matlabrevQJE

}


					*****************************************
					*** DATA CONSTRUCTION : FRANCE: DADS ****
					*****************************************
				
{						

					
*** Create long panel + very basic checks
{

* List of all variables in panel
{

* To show all variables in a dataset

/*
 foreach var of varlist *{
  2. di "`var'" _col(20) "`: var l `var''" _col(50)  "`: val l `var''"
  3. }
*/

/*

depr               D�partement de r�sidence      
dept               D�partement de travail        
an                 Ann�e de la validit�          
comr               Commune de r�sidence (depuis 1988)
comt               Commune de travail (ie l'�tablissement)
ech                Echelon dans le grade, FPE uniquement
min                Minist�re, FPE uniquement     
ser                Service (au sein du minist�re), FPE uniquement
ens                Code Enseignant, FPE uniquement
csk                Cat�gorie socioprofessionnelle avec qualification, FPE uniquement
depnai             D�partement de naissance      
regr               R�gion de r�sidence           
st                 Statut de l'entreprise ou administration (jusqu'en 1992), DADS uniquement
av                 Avantages en nature           
netnet             Salaire NETNET (correspond au S_NET DADS), estim� jusqu'en 1993
age                �ge                           
annai              Ann�e de naissance            
debremu            Date de d�but de r�mun�ration (entre 1 et 360)
finremu            Date de fin de r�mun�ration (entre 1 et 360)
dp                 Dur�e de paie en jours (entre 1 et 360)
dpc                Dur�e de paie en jours, convertie en �quivalent temps plein, FPE uniquement
sir                Num�ro siren de l'entreprise, DADS uniquement
ce                 Condition d'emploi            
cs1                CS � 1 chiffre                
sx                 Indicatrice de sexe=masculin  
filtre             Indicatrice de poste non annexe
grade              Grade ou emploi, FPE uniquement
sb                 Salaire brut fiscal (depuis 1993, estim� avant 1993)
sn                 Salaire net fiscal            
cat                Cat�gorie d'agents            
statut             Statut (titulaire, non titulaire, stagiaire), FPE uniquement
regt               R�gion de travail             
bascsg             Base CSG (depuis 1999)        
a38                Activit� agr�g�e � partir de 1994
ape40              Activit� agr�g�e (jusqu'en 1992)
apen               Activit� �conomique de l'entreprise (jusqu'en 2008)
apet               Activit� �conomique de l'�tablissement (jusqu'en 2008)
apen2              Activit� �conomique de l'entreprise en NAF rev. 2 (depuis 2008)
apet2              Activit� �conomique de l'�tablissement en NAF rev. 2 (depuis 2008)
catjur             Cat�gorie juridique (depuis 1986), DADS uniquement
contrat_travail    Type de contrat de travail (depuis 2005), DADS uniquement
conv_coll          convention collective (depuis 2005), DADS uniquement
cs1_anc            CS � 1 chiffres jusqu�en 1982 inclus
cs2                CS � 2 chiffres               
cs2_anc            CS � 2 chiffres jusqu�en 1982 inclus
domempl            Domaine d'emploi (depuis 1986)
msb_ent            Masse des salaires bruts fiscaux dans l'entreprise
nbheur             Nombre d'heures r�mun�r�es (depuis 1993), DADS uniquement
nblig              Nombre de lignes �tablissement regroup�es dans l'observation entreprise
nbsa_ent           Nombre de salari�s dans l'entreprise au 31/12
nbsa_et            Nombre de salari�s dans l'�tablissement au 31/12
nes36              Activit� �conomique �tablissement (de 1994 � 2007)
nes36n             Activit� �conomique entreprise (de 1995 � 2001)
nes5               Activit� �conomique �tablissement (sauf en 1993)
netnet_corrcpso    Salaire NETNET 2012 estim� pour prendre en compte les cotisations patronales pou
nic4               NIC �tablissement             
nnifict            Indicatrice de nirs fictifs : INVALIDE: NE PERMET PAS D'ANALYSE DE PANEL
nnihc              Indicatrice de nirs hors champ : PAS NE EN OCTOBRE DE LA BONNE ANNEE: JUSTE AJOUTES CHAQUE ANNEE POUR COMPLETER LA TAILLE DU PANEL AU 1/25 OU 1/12
pcs4               Code PCS � 4 chiffres (depuis 1993), DADS uniquement
pcs_v2             Code PCS-ESE � 4 chiffres selon la m�thode de 2009 (en 2008, priv� uniquement)
pseudosir          Indicatrice de pseudo siret (de 1993 � 2008), DADS uniquement
ptt                Indicatrice de France Telecom et La Poste, DADS uniquement
regn               R�gion de l'entreprise        
sirfict            Indicatrice de siren fictif, DADS uniquement
sect               Secteur d'emploi (Priv�, Administration...)
entpan             Ann�e d'entr�e dans le panel  
nninouv            Identifiant individu          
entsir             Date d'entr�e dans l'entreprise en ann�es
tain               Tranche de taille d'entreprise
tait               Tranche de taille d'�tablissement
netnetr            Salaire NETNET (correspond au S_NET DADS), estim� jusqu'en 1993, en euros consta
netnet_corrcpsor   Salaire NETNET 2012 estim� pour prendre en compte les cotisations patronales pou
sbr                Salaire brut fiscal en euros constants (depuis 1993, estim� avant 1993)
bascsgr            Base CSG en euros constants (depuis 1999)
snr                Salaire net fiscal en euros constants
avr                Avantages en nature en euros constants
msbr_ent           Masse des salaires bruts fiscaux dans l'entreprise en euros constants
pan25              Indicatrice du panel au 25�me (depuis 1976)


*/
}
*

global varliste an annai depr dept comr comt depnai age dp debremu finremu sir sx apen apet cs2 nbsa_ent nbsa_et nic4 pcs4 sect nninouv nnifict nnihc snr avr sbr entsir msbr_ent pan25


use $varliste using "/Users/adrien/Desktop/Perso/Data/DADS/Panel/source/source_stata/pan9401", clear
save "$root/output/temp/pan9401", replace

use $varliste using "/Users/adrien/Desktop/Perso/Data/DADS/Panel/source/source_stata/pan0207", clear
append using "$root/output/temp/pan9401"

save "$root/output/temp/pan9407_00", replace

erase "$root/output/temp/pan9401.dta"

cd "$root/output"

* Renaming, basic checks and cleaning

clear
use "temp/pan9407_00", clear

count // 26,174,523

* Errors in individual identifiers which do not allow for a panel analysis
*tab nnifict

egen Nnifict = nvals(nnifict), by(nninouv)
*tab Nnifict

drop if nnifict == "1" // drop 1.2m
drop Nnifict

* Individuals not born in October of the right year
*tab nnihc

egen Nnihc = nvals(nnihc), by(nninouv)
*tab Nnihc 

*tabstat an if Nnihc == 2 , stat(N) by(an)

drop if Nnihc == 2 // drop 26k
drop if nnihc == "1" // drop 138k
drop nnihc Nnihc

* Put municipalities in 5-digit format
foreach s in r t {
gen Com`s' = dep`s' + com`s' if strlen(dep`s') == 2 & strlen(com`s') == 3
}

order nninouv comr Comr comt Comt
br


foreach s in r t {
drop dep`s' com`s'
rename Com`s' com`s'
}

* Clean age / year of birth
*count // 24,244,274
*su age // 24,242,809 obs
egen MissingAge = max( age == . ), by(nninouv)
drop if MissingAge == 1 // drop 2k
drop MissingAge

* Filter on age
	* First make sure that age is monotonically increasing
gen check = age - an
egen Nc = nvals(check) , by(nninouv)
*tab Nc

drop if Nc == 2 // drop 8k
drop Nc check

* Look at age
* tab age

* Filter age // 15-80
keep if age >= 15 & age <= 80 // drop 100k

* Gender
egen Ns = nvals(sx), by(nninouv)
tab Ns

drop if Ns == 2
drop Ns

count // 24,129,409
* tab sx

* Keep only men
keep if sx == "1" //drop 11m

rename an year
rename depnai birth_dep

rename debremu start
rename finremu end

rename sx sex

rename sir siren

rename nbsa_ent firm_size
rename nbsa_et plant_size

rename nic4 nic

rename pcs4 cs4

* Keep public sector - otherwise may mistake periods of public sector employment as unemployment
rename sect public

rename nninouv id
rename entsir date_join_firm

rename sbr gwc // gross wage in constant euros
rename snr nwc // net wage in constant euros
rename avr bc // benefits in constant euros
rename msbr_ent payrolln // total wages of firm

* Checks for change in sampling in 2002
*tab pan25
*su year if pan25=="0"
*su year if year >= 2002

* Check that pan25 is constant by individual
egen DistinctPan25 = nvals(pan25), by(id)
*tab DistinctPan25
drop if DistinctPan25 == 2 // drop 16k
*drop DistinctPan25
count // 12,908,265
*tab pan25 if pan25 == "" // no obs

* Sort
sort id year
* Clean start and end dates
gen days = end - start + 1

gen check = days - dp
* su check, d

count //  12,908,265
count if days != . //  12,895,826
count if dp !=  . // 12,908,239
count if check > 0 // 681,148

egen MissingSpell = max( (days == . ) | ( dp == . ) ), by(id)
* tab MissingSpell
drop if MissingSpell == 1
drop MissingSpell

* tab year
*tab year if check > 0
*egen DaysIssue = max( check != 0 ) , by(id)	
	
save "dads_0", replace

}

*** More cleaning
{

use "dads_0", clear
order id year start end days dp gwc
* su start end days dp , d

* correct spells with negative start and end (double-check)
su start if end < 0 // max is -1
su year if end < 0 // only 2007

replace year = year - 1 if end < 0
replace start = 360 + start if end < 0
replace end = 360 + end if end < 0

su year // min still 1994
replace start = max(start,1)
replace days = end-start + 1
su days // 1 to 360

count if days == . // 0 obs

gduplicates drop id year siren start end, force // drop 472 more

* Save
save "dads_01", replace

}

*** Put to quarterly level; mimick EEC in selecting a spell in each quarter, with a random reference week
{

use id pan25 using "dads_01", clear
gsort id
bys id: keep if _n == 1
save "temp/IdPan25", replace

use "dads_01", clear

order id year start end days
gsort id year start end

gen q1 =  start <= 90
gen q2 = 2 * ( ( start > 90 & start <= 180 ) | ( start <= 90 & end > 90 ) )
gen q3 = 3 * ( ( start > 180 & start <= 270 ) | ( start <= 180 & end > 180 ) )
gen q4 = 4 * ( end > 270 )

expand 4

gsort id year siren start end
bys id year siren start end: gen Q = _n

order id siren nic year start end Q q*

* Keep only relevant quarter-spells
keep if inlist(Q,q1,q2,q3,q4) // 39m remain

* Define daily and quarterly wage
foreach var in gwc nwc {
gen d`var' = `var' / days
gen q`var' = 90 * d`var'
}

* Define income from each spell in a given quarter
	* Define start and end dates by quarter
gen start1 = max( start , 1   )		if Q == 1
gen start2 = max( start , 91  )		if Q == 2
gen start3 = max( start , 181 )		if Q == 3
gen start4 = max( start , 271 )		if Q == 4

gen end1 = min( end , 90  )		if Q == 1
gen end2 = min( end , 180 )		if Q == 2
gen end3 = min( end , 270 )		if Q == 3
gen end4 = min( end , 360 )		if Q == 4

	* Define number of days in each quarter of the spell
forvalues i=1(1)4 {
gen days`i' = end`i' - start`i' + 1
}

	* Total income in each quarter for each spell
gen Qw = 0
forvalues i=1(1)4 {
replace Qw = dgwc * days`i' if Q == `i'
}

drop start1 start2 start3 start4 end1 end2 end3 end4 days1 days2 days3 days4
drop q1 q2 q3 q4


* Keep only highest paying spell in quarter
* Draw a random reference week in each quarter for each individual	
set seed 27021991
gen ReferenceWeek = floor( 13 * runiform() + 1)
bys id: replace ReferenceWeek = ReferenceWeek[1] if _n >= 2
	* Create a unique reference week per individual to select spells every quarter, as in the EEC

* Find all spells in the reference week
gen StartReference = (Q-1) * 90 + ( Reference - 1 ) * 7 + 1
gen EndReference =  (Q-1) * 90 + min( Reference * 7 , 90 )

* su StartRef EndRef start end

gen IsInReference = ( start <= StartReference & end >= StartReference ) | ///
					( start >= StartReference & start <= EndReference )

su IsIn // mean = 0.9129

* define the reference week only on employment spells
* if I do not see the worker here in the reference week, it will be automatically
* assigned to non-employment below

* Create variable to allow to set quarter to unemployed if more than half is spent non-employed
gen startq = max( start , (Q-1)*90 + 1 )
replace startq = . if start == .

gen endq = min( end , Q*90 )
replace endq = . if end == .				

gen daysq = endq - startq + 1
							
order id year Q start end startq endq Ref StartRef EndRef IsIn

* Define minyear before dropping spells
egen minyear = min(year), by(id)
egen maxyear = max(year), by(id)
keep if IsInReference == 1 // drop 3m

* Keep only highest paying spell in quarter in reference week
order id year Q Qw
sort id year Q Qw
bys id year Q (Qw) : gen highest = (_n==_N)
drop if highest == 0 // drop 2.1m
drop highest

drop StartRef EndRef IsInRef
rename Q q

order id year q qnwc qgwc

save "dads_1", replace

gdistinct id //   1 498 997 individuals, for a total of 34m spell-quarter

}

*** Fill in unemployment periods
{

** Prepare filling dataset
clear
set obs 14
gen year = 1993 + _n
expand 4
sort year
bys year: gen q = _n
save "temp/allq", replace

** Create full list of id year q
use id minyear maxyear using "dads_1", clear

sort id
bys id: keep if _n == 1

cross using "temp/allq" // resulting database has 44m obs (2min run)

* drop extreme observations to keep dataset manageable
* also drop: 2 years before entry or 2 years after last obs simply considered
* similar requirement to initial entry / final exit
drop if year < minyear - 2 | year > maxyear + 2

save "temp/allidq", replace // 2.3 GB

use "temp/allidq", clear
merge 1:1 id year q using "dads_1" // 17:54

tab _m if age >= 25 | age <= 60 // 67% matched

gen employed = _m == 3
drop _m



** Some check for workers who enter old in the middle of the sample
gen TotalYears = maxyear - minyear + 1

* Drop variables
drop nnifict Qw dp sex bc
order id year q employed startq endq daysq start end days

* Save

save "dads_2", replace
erase "temp/allidq.dta"

}

}




					************************************************************
					*** DATA CONSTRUCTION : FRANCE: CZ AND SKILLS FROM DADS ****
					************************************************************

{

*** Prepare commuting zone crosswalk
{

cd "$root"

* Basic crosswalk: start by importing INSEE data for 2011 CZ
use "source/ze", clear
gen str5 comr = string(com,"%05.0f")
drop com
save "output/temp/zer", replace

cd "$root/output"

* List of comr in dads
use comr using "dads_2", clear
bys comr: keep if _n==1
drop if comr == ""
save "temp/dads_comr", replace

use "temp/dads_comr", clear
merge m:1 comr using "temp/zer"

order comr _m ze

** Create crosswalk of dads comr to the comr classification of the zer file, which comes from the 2010 INSEE classification
gen comr2 = ""

* 1. Arrondissements
replace comr2 = "75056" if substr(comr,1,2) == "75" // 60 changes - some other weird municipalities e.g. 75231 which do not exist are changed along the way. But since the 75 part comes from the departement code, we still attribute them to the Paris municipality
replace comr2 = "69123" if substr(comr,1,4)=="6938" & inlist(substr(comr,5,1),"1","2","3","4","5","6","7","8","9") // 9 changes
replace comr2 = "13055" if substr(comr,1,3)=="132" & ( inlist(substr(comr,4,2),"01","02","03","04","05","06","07","08","09") | inlist(substr(comr,4,2),"10","11","12","13","14","15","16") ) // 16 changes

save "temp/dads_comr_1", replace


* 2. Historical code changes
import delimited "$root/source/historiq2018.txt", clear 

rename dep dep_orig
drop ar ct leg jo eff c_loff c_lanc arranc ctanc tnc* ncc*
rename com com_orig
rename dtr last_date
rename mod modification_type
rename nbcom number_of_com_involved_in_change
rename comech com_codes_involved_in_change
rename popech population_from_or_to_new
	* to: modification_type = 620
	* from: modification_type = 600,610
rename suech surface_from_or_to_new
rename depanc dep_old

* Residual: consider new ZEs and keep if no variation
keep dep_orig com_orig mod com_codes pop surf
gen str5 com_orig2 = string(com_orig,"%03.0f")
drop com_orig
rename com_orig2 com_orig
replace com_orig = dep_orig + com_orig
drop dep_orig
drop if com_codes==""

* 9k observations
rename com_orig comr
merge m:1 comr using "temp/zer" // 3k not matched from using, 6k matched
rename comr com_orig
keep if inlist(_m,1,3)
drop _m
rename ze ze_orig

rename com_codes comr
merge m:1 comr using "temp/zer" // 3k not matched from using, 6k matched
rename comr com_codes
keep if inlist(_m,1,3)
drop _m
rename ze ze_codes

order com_orig com_codes ze_orig ze_codes mod

count if ze_orig == . & ze_codes == . // 164

sort com_orig
egen distinct_ze_codes_by_orig = nvals(ze_codes), by(com_orig)
tab distinct_ze_codes

egen distinct_ze_orig_by_codes = nvals(ze_orig), by(com_codes)
tab distinct_ze_orig 

* Mapping orig to codes
preserve

keep com_orig com_codes ze_codes distinct_ze_codes

keep if distinct == 1
drop distinct

rename com_orig comr
rename com_codes comr2010
rename ze_codes ze2010

save "temp/HistoricalMappingTo2010ZE_1", replace

restore

* Mapping codes to orig
preserve

keep com_orig com_codes ze_orig distinct_ze_orig

keep if distinct == 1
drop distinct

rename com_codes comr
rename com_orig comr2010
rename ze_orig ze2010

save "temp/HistoricalMappingTo2010ZE_2", replace

restore

use "temp/HistoricalMappingTo2010ZE_1", clear
append using "temp/HistoricalMappingTo2010ZE_2"

egen distinct = nvals(ze2010), by(comr)
tab distinct

drop comr2010

bys comr: keep if _n==1
drop distinct

save "HistoricalMappingTo2010ZE", replace

* Apply mapping to dads comr
use "temp/dads_comr_1", clear

rename comr comr1
rename ze ze1
rename _m old_merge

rename comr2 comr
merge m:1 comr using "temp/zer", keep(1 3)

replace ze1 = ze if _m==3
drop _m

rename comr comr2
rename comr1 comr
drop ze
rename ze1 ze

merge m:1 comr using "HistoricalMappingTo2010ZE", nogen keep(1 3) // matched 3k

replace ze = ze2010 if ze == . & ze2010 != . // 1500 saves
drop comr2 ze2010 old_merge

count if ze == . // still 3800 missing ze
save "temp/dads_comr_2", replace
* In some of the remaining cases, we have postal codes instead of insee codes


* Get postal vs insee correspondance
import delimited "$root/source/correspondances-code-insee-code-postal.csv", varnames(1) clear 
tostring codeinsee, replace force

gen insee = substr(codeinsee,1,5)
gen postal = substr(codepostal,1,5)

keep insee postal

egen distinct_insee = nvals(insee), by(postal)
egen distinct_postal = nvals(postal), by(insee)

tab distinct_insee
tab distinct_postal

save "InseeToPostalCommuneCodes", replace

* Apply to last missing values
use "temp/dads_comr_2", clear

	* Merge with postal to insee correspondance
rename comr postal
merge 1:m postal using "InseeToPostalCommuneCodes", nogen keep(1 3) //  matched 1300

rename ze ze1
rename postal comr1
rename insee comr
drop dist*

	* Merge with ze data
merge m:1 comr using "temp/zer", nogen keep(1 3) // match 1300
	* Pick one in case of multiple insee codes
egen ze2 = max(ze), by(comr)

replace ze1 = ze2 if ze1 == . & ze2 != . // save 700
drop comr
rename comr1 comr
drop ze ze2
rename ze1 ze

bys comr: keep if _n == 1

save "temp/dads_comr_3", replace

* additional postal codes from La Poste

import excel "$root/source/laposte_hexasmal.xlsx", sheet("Sheet") firstrow clear
keep Code*
rename Code_postal postal
rename Code insee

egen distinct_insee = nvals(insee), by(postal)
egen distinct_postal = nvals(postal), by(insee)

tab distinct_insee
tab distinct_postal

save "InseeToPostalCommuneCodes2", replace


* Apply to last missing values
use "temp/dads_comr_3", clear
rename comr postal
merge 1:m postal using "InseeToPostalCommuneCodes2", nogen keep(1 3) //  matched 1300

rename ze ze1
rename postal comr1
rename insee comr
drop dist*

merge m:1 comr using "temp/zer", nogen keep(1 3) // match 37000
	* pick one in case of multiple insee codes
egen ze2 = max(ze), by(comr)

replace ze1 = ze2 if ze1 == . & ze2 != . // save 11k
drop comr
rename comr1 comr
drop ze ze2
rename ze1 ze

bys comr: keep if _n == 1

save "temp/dads_comr_4", replace

* Some codes seem str secondary postal codes of cities we attribute the last missing values
* based on the departement. we attribute a departement to a commuting zone based on population

cd "$root"

use "source/Unemployment2007", clear
gen str5 com = string(comr,"%05.0f")
drop comr
rename com comr
save "output/Census2007", replace

cd "$root/output"

use "temp/zer", clear
merge 1:1 comr using "Census2007", nogen keep(1 3)

gen dep = substr(comr,1,2)
egen pop = sum(Labor), by(dep ze)
keep dep ze pop
order dep ze pop
drop if dep == "."
bys dep ze: keep if _n==1

egen SumPop = sum(pop), by(ze)
bys ze: gen share = pop / SumPop

sort ze
su share, d

* commuting zones are typically well contained in departements
drop SumPop share
egen SumPop = sum(pop), by(dep)
bys ze: gen share = pop / SumPop
sort dep
su share, d

* but departements are not well contained in commuting zones
sort dep share
bys dep (share): keep if _n == _N
keep dep ze

save "DepartementToZEApproximateCrosswalk", replace

* Apply crosswalk to last observations without valid commune code

use "temp/dads_comr_4", clear
rename ze ze1

gen dep = substr(comr,1,2)

merge m:1 dep using "DepartementToZEApproximateCrosswalk", nogen keep(1 3)

replace ze1 = ze if ze1 == . // saved 400

drop dep ze
rename ze1 ze

save "temp/dads_comr_5", replace

save "CrosswalkCommuneToZoneEmploi", replace

}


*** Inflation, CZ area and population data
{

* Inflation

import excel "$root/source/econ-gen-taux-inflation.xls", sheet("Données") firstrow clear
drop if _n < 4 
drop if _n >= 28
rename Taux year
rename B inflation
drop C
destring year, replace force
destring inflation, replace force

set obs 28
replace year = 1990 if year == .
replace infl = 0 if infl == .

gen linf = log( 1 + inflation / 100 )
sort year
gen lipc = sum(linf)
gen ipc = exp(lipc)
keep year ipc

save "ipc", replace

* Get area
import delimited "$root/source/villes_france.csv", delimiter(comma) clear 

rename v11 com
rename v19 area

keep com area

save "MunicipalityArea", replace

* Clean population and wage data
use "$root/source/WagesPopPostescomrExport", clear

* Deflate wages to 1994 euros
merge m:1 year using "ipc", nogen keep(1 3)
replace ze_wage = ze_wage / ipc
drop ipc
sort ze year

* Need to detrend population

* Euro conversion before 1999 included
replace ze_wage = ze_wage * 0.1524 if year <= 1999

* Drop ZE that I do not have in all years
gen n = 1
egen NY = sum(n), by(ze)
tab NY // 4% with not all years
drop if NY < 20
drop n NY

* Keep only 1994 and 2002 values
keep if year == 1994 | year == 2002
drop nobs
rename Perm ze_pop

reshape wide ze_pop ze_wage, i(ze) j(year)

save "WagesPopPostesCleaned9402", replace

* More careful cleaning to keep all years
{

	* Clean wages and population
use "$root/source/WagesPopPostescomrExport", clear

* Deflate wages to 1994 euros
merge m:1 year using "ipc", nogen keep(1 3)
replace ze_wage = ze_wage / ipc
drop ipc
sort ze year

* Need to detrend population

* Euro conversion before 1999 included
replace ze_wage = ze_wage * 0.1524 if year <= 1999

* Drop ZE that I do not have in all years
gen n = 1
egen NY = sum(n), by(ze)
tab NY // 4% with not all years
drop if NY < 20
drop n NY

preserve
fcollapse (mean) Perm ze_wage, by(year)
line Perm year, name(Pop, replace)
line ze_wage year, name(Wage, replace)
restore
/*

	* 2002 is an (upward) outlier for wages
	* 2002-2003 is a break for population
	* 2008-2009 is a break for population
	* 2010 is a (downward)  outlier for population

*/

* 1. adjust wage

egen w01 = max( ze_wage * ( year == 2001 ) ), by(ze)
egen w03 = max( ze_wage * ( year == 2003 ) ), by(ze)

replace ze_wage = w01 * sqrt( w03 / w01 ) if year == 2002
drop w01 w03

* adjust population in 2010

egen p09 = max( Perm * ( year == 2009 ) ), by(ze)
egen p11 = max( Perm * ( year == 2011 ) ), by(ze)

replace Perm = p09 * sqrt( p11 / p09 ) if year == 2010
drop p09 p11

foreach t in 02 08 {

local tm1 = `t' - 1
local tp1 = `t' + 1
local tp2 = `t' + 2

egen pm = max( Perm * (year == 2000 + `tm1' ) ), by(ze)
egen pc = max( Perm * (year == 2000 + `t' ) ), by(ze)
egen pp = max( Perm * (year == 2000 + `tp1' ) ), by(ze)
egen ppp = max( Perm * (year == 2000 + `tp2' ) ), by(ze)

gen Frac = pc / pp * sqrt( pc / pm * ppp / pp )

replace Perm = Perm * Frac if year >= 2000 +`tp1'

keep ze year PermPop ze_wage

}

preserve
fcollapse (mean) Perm ze_wage, by(year)
line Perm year, name(Pop, replace)
line ze_wage year, name(Wage, replace)
restore

* Save
save "temp/WagesPopPostes_0", replace

* De-trend population and real wages: using current weights
use "temp/WagesPopPostes_0", clear

gen year0 = year - 1994

foreach var in ze_wage PermPop {
gen l`var' = log(`var')
reg l`var' year0 [w = PermPop]
replace l`var' = l`var' - _b[year0] * year0
replace `var' = exp(l`var')
}
drop year0 l*

preserve
fcollapse (mean) Perm ze_wage, by(year)
line Perm year, name(PopClean, replace)
line ze_wage year, name(WageClean, replace)
restore

save "WagesPopPostesCleaned", replace

* De-trend population and real wages: using 1994 or 2002 weights
foreach t in 1994 2002 {

use "temp/WagesPopPostes_0", clear
keep if year >= `t'

gen year0 = year - `t'
egen pop0 = max( Perm * (year == `t' ) ), by(ze)

foreach var in ze_wage PermPop {
gen l`var' = log(`var')
reg l`var' year0 [w = pop0]
replace l`var' = l`var' - _b[year0] * year0
replace `var' = exp(l`var')
}
drop year0 l*

preserve
fcollapse (mean) Perm ze_wage, by(year)
line Perm year, name(PopCleanInitial`t', replace)
line ze_wage year, name(WageCleanInitial`t', replace)
restore

save "WagesPopPostesCleanedW`t'", replace
}


}


* CZ area and size by year

use "temp/zer", clear
rename comr com
merge 1:1 com using "MunicipalityArea"

keep if inlist(_m,1,3)
drop _m

order com area ze

fcollapse (sum) area , by(ze)

save "temp/ZeArea", replace

* Current weighting for de-trend
use "temp/ZeArea", clear

merge 1:m ze using "WagesPopPostesCleaned", nogen keep(3) // only 1 not matched
rename area ze_area

gen ze_dens = Perm / ze_area
drop Perm ze_area

foreach var in ze_wage ze_dens {
gen l`var' = log(`var')
}

keep ze lze* year
sort ze year
save "ZeWageDensCurrent", replace

* 1994 and 2002 weighting for de-trend
foreach t in 1994 2002 {

use "temp/ZeArea", clear

merge 1:m ze using "WagesPopPostesCleanedW`t'", nogen keep(3) // only 1 not matched
rename area ze_area

gen ze_dens = Perm / ze_area
drop Perm ze_area

foreach var in ze_wage ze_dens {
gen l`var' = log(`var')
}

keep ze lze* year
sort ze year
save "ZeWageDensCurrentW`t'", replace

}

}

*** CZ identifiers and skills
{

use "dads_2", clear

* Merge with CZ dentifiers
merge m:1 comr using "CrosswalkCommuneToZoneEmploi", nogen keep(1 3) // matched 25m, not matched 14m

count if employed == 1 // 24m
count if ze == . & employed == 1 // 220k, so about 0.6%

* Wage measure
gen lqgwc = log(qgwc)

* Tenure
gen tenure = floor(year - date_join)

* Sort
gsort id year q

* destring cs2
destring cs2, replace force

* destring siren and nic
replace siren = "0" if siren == "ETAT"
destring siren, replace force

destring nic, replace force
* siret will need to be defined by 10000 * siren + nic
* because nic was a 4-digit string (usually with many leading zeros)

* define individual identifiers
gegen gid = group(id)

* fill in age
gegen age0 = min(age), by(id)
gen age1 = age0 + year - minyear	
corr age age1 // 0.9991
drop age age0
rename age1 age

gen agebin = floor(age/5)

* Save
save "dads_4", replace


* Mincer regression with cs2
use "dads_4", clear

* Reduce dataset for speed
keep if employed == 1
keep id year q agebin lqgwc tenure ze cs2 siren

* Skill only based on cs2
reghdfe lqgwc, a(Age = agebin Cs2 = cs2 year ze siren) // takes ~5 minutes for cs2

	* Ensure skill is defined wherever applicable --- fill in values if missing for some workers
gegen AgeM = max(Age), by(agebin)
gegen Cs2M = max(Cs2), by(cs2)
	* Define skill
gen skill = AgeM + Cs2M
	* Extend skill at full individual level
gegen Skill = mean(skill), by(id)

	* Keep only one observation per individul to rank skills in quantiles
sort id
bys id: keep if _n == 1

xtile skillbin = Skill, nq(300) // ~ as many as cities. 468 possible combinations of agebin x cs2 (13 agebins, 36 cs2)
drop skill Skill
rename skillbin skill

keep id skill

save "skills", replace


* Mean CZ wages
use "dads_4", clear
keep if employed == 1 // Important otherwise weights a lot past wages of unemployed
fcollapse (mean) ze_wage = qgwc, by(ze year)
gen lze_wage = log(ze_wage)
save "temp/MeanLogWageCZ", replace

}


}




					****************************************
					*** DATA CONSTRUCTION : FRANCE: EEC ****
					****************************************

{

		*** PRODEGO

*** Show variables
{

use "$root2/source/EnqueteEmploi/EECall_2003/Stata/indiv031", clear
ShowVariables

use "$root2/source/EnqueteEmploi/EECall_2003/Stata/log031", clear
ShowVariables

}


*** Extract useful variables
{

* STC = 1 : a son compte ou salarie chef d'entreprise -> use wage data to discriminate
* TAM1B = 1  : a son compte. = 2 : non
* TAM1D = 1 : dirigeant salarie (discriminate STC)

global VariablesT ///
	   acteu6 acper adfdap ag alcnc alct amois ancchomm ancrech ///
	   annee anp4a anp4b cite97   ///
	   com cse csa csei dep  dremc ident  ///
	   inscac inscam mchoe naia nat28 nctemp nivet nmdatp noi officc ///
	   salmee salmet sexe sp00 sp01 sp02 sp03 sp04 sp05 sp06 sp07 ///
	   sp08 sp09 sp10 sp11 datcoll datdeb lpr extri nboulo ttrref ///
	   anp1a anp2a anp4a anp4b inscac inscam comsal

global VariablesSelfEmployed stc tam1b tam1d stat2 statutr stnred

forvalues year = 2003(1)2007 {

	local y = substr("`year'",3,2)

	forvalues q = 1(1)4 {
	
	di "`year' - `q'"

if `year' <= 2005 {
	use $VariablesT $VariablesSelfEmployed ///
		using "$root2/source/EnqueteEmploi/EECall_`year'/Stata/indiv`y'`q'", clear
	
	rename acteu6 act6

}
if `year' >= 2006 & `year'<= 2008  {
	use $VariablesT $VariablesSelfEmployed ///
			using "$root2/source/EnqueteEmploi/EECall_`year'/Stata/indiv`y'`q'", clear
	
	rename acteu6 act6
}
if `year' >= 2006 & `year' >= 2009  {
	use $VariablesT ///
			using "$root2/source/EnqueteEmploi/EECall_`year'/Stata/indiv`y'`q'", clear
	
	rename acteu6 act6
	rename lpr lprm
}

gen year = `year'
gen q = `q'

save "temp/EEC_`year'Q`q'", replace

}
}

clear
forvalues year = 2003(1)2007 {
	forvalues q = 1(1)4 {

append using "temp/EEC_`year'Q`q'"

}
}

save "EEC_01", replace

}


*** Basic clean
{

use "EEC_01", clear

keep if sexe == "1"
drop sexe

* Keep non-household heads to be as close as possible to DADS
drop lpr

drop datdeb datcoll // since the number of the dataset is the quarter

rename extri wt

drop nboulo

gen     typejob = "Regular" if ttrref == "1"
replace typejob = "Temporary" if ttrref != "1"
drop ttrref
	
gen     labforce = 1 if inlist(act6,"1","3","4") == 1
replace labforce = 0 if inlist(act6,"5","6") == 1

gen     unemp = 1 if labforce == 1 & inlist(act6,"3","4") == 1
replace unemp = 0 if labforce == 1 & inlist(act6,"1") == 1

drop act6


rename naia yearbirth
destring yearbirth, replace force

* Drop variables that are almost empty
drop salmet nmdatp

* U benef
gen registeredPE = ( officc == "1" )
drop offic

gen receivesUbenef = 1 if acper == "1"
replace receives = 0 if acper == "2"
drop acper

drop alct

gen     reasonNoUbenef = "ran out" if alcnc == "1"
replace reasonNoUbenef = "processing" if alcnc == "2"
replace reasonNoUbenef = "rejected" if alcnc == "3"
replace reasonNoUbenef = "inelegible" if alcnc == "4"
drop alcnc

drop mchoe // not well filled in

rename adfdap lastyearEmployed
destring lastyearEmployed, replace force

rename amois lastmonthEmployed
destring lastmonthEmployed, replace force

drop annee // redundant with year

* Wage
rename salmee wage
destring wage, replace force

* Looking for a job
rename dremc dayslookingforwork
drop nctemp // almost empty

rename ag age
destring age, replace force
keep if age >= 25 & age <= 52

* Keep non-French, because they will show up in DADS
drop nat28

* Education
gen     educ = 1 if inlist(cite97,"0..","1..","2..") == 1 // pre-high-school
replace educ = 2 if inlist(cite97,"3A.","3B.","3CL") == 1 // high-school
replace educ = 3 if inlist(cite97,"3CM","5B.","4..","5AS") == 1 // some college
replace educ = 4 if inlist(cite97,"5AL","5AM","6..") == 1 // college
drop cite97
drop nivet

rename cse occ2 // pcs 2003
rename csa lastocc2unemp

* Individual identifier
gen id0 = ident + noi
fegen id = group(id0)
drop ident noi id0

* Self-employment
egen mstc = max( stc == "3") , by(id)
drop if mstc == 1
drop mstc
gen     emptype = "self-employed, or CEO with wage" if stc == "1"
replace emptype = "employed" if stc == "2"
drop stc // filled in for only half of observations

gen     empcatjur = "within firm" if tam1b == "1"
replace empcatjur = "outside firm" if tam1b == "2"
drop tam1b // almost perfectly filled in when stc == 1

drop if empcatjur == "outside firm" // drop black market or non-profits: drop 17k
drop empcatjur

gen     empstatus = "CEO with wage" if tam1d == "1"
replace empstatus = "other"  if inlist(tam1d,"2","3","4","5")
drop tam1d

* Drop missing self-employment status
drop if emptype == "self-employed, or CEO with wage" & empstatus == "" // drop 22

* Define self-employment
gen selfemp = ( emptype == "self-employed, or CEO with wage" & empstatus == "other" ) == 1


* Correct for change after 2009
egen maxself = max(selfemp), by(year)
* tabstat maxself , stat(mean) by(year)

replace selfemp = . if maxself == 0
drop maxself

order id year q age educ labforce unemp dep yearbirth
sort id year q

* Enforce panel structure
bys id: gen Nobs = _N
tab Nobs

egen minyear = min(year), by(id)
egen minq0 = min( q ) , by(id year)
egen minq = max( minq0  * ( year == minyear ) ) , by(id)

gen index = ( year - minyear ) * 4 + q - minq + 1

egen MaxIndex = max(index), by(id)
egen MinIndex = min(index), by(id)

gen NobsPanel = MaxIndex - MinIndex + 1

gen dropit = NobsPanel != Nobs // 18k observations where not equal
drop if dropit == 1 // drop 18k
drop Nobs minyear minq0 minq MaxIndex MinIndex NobsPanel dropit

* Define an individual as unemployed in the quarter if the job is a temp job,
* to be consistent with DADS
tab typejob

* ANPE
rename anp1a RegisteredSinceLastEE_ANPE
rename anp2a CheckDateRegisteredANPE
rename anp4a NumDaysSinceLastJobOfferANPE
rename anp4b NumDaysSinceLastContactWithANPE
rename inscac YearRegisteredANPE
rename inscam MonthRegisteredANPE

* Unemp duration
rename ancchom UnempDurationMonths
rename ancrech SearchDurationGroup

* How found job
rename comsal HowFoundJob

* Save
order id year q age educ labforce unemp dep yearbirth
sort id year q

save "EEC_02", replace

}


*** Transition rates including self-employment correction
{

use "EEC_02", clear

* Correct for self-employment
replace labforce = 0 if selfemp == 1
replace unemp = . if selfemp == 1

* indicator to compute rates
sort id year q
bys id: gen NoRate =  _n == _N

* UE rate
sort id year q
gen UE = .
bys id: replace UE = 1 if unemp == 1 & unemp[_n+1] == 0 & labforce == 1 & _n < _N
bys id: replace UE = 0 if unemp == 1 & unemp[_n+1] == 1 & labforce == 1 & _n < _N

* UN rate
sort id year q
gen UN = .
bys id: replace UN = 1 if unemp == 1 & labforce == 1 & labforce[_n+1] == 0 & _n < _N
bys id: replace UN = 0 if unemp == 1 & labforce == 1 & labforce[_n+1] == 1 & _n < _N

* EU rate
sort id year q
gen EU = .
bys id: replace EU = 1 if unemp == 0 & unemp[_n+1] == 1 & labforce == 1 & _n < _N
bys id: replace EU = 0 if unemp == 0 & unemp[_n+1] == 0 & labforce == 1 & _n < _N

* EN rate
sort id year q
gen EN = .
bys id: replace EN = 1 if unemp == 0 & labforce == 1 & labforce[_n+1] == 0 & _n < _N
bys id: replace EN = 0 if unemp == 0 & labforce == 1 & labforce[_n+1] == 1 & _n < _N

* NU rate
sort id year q
gen NU = .
bys id: replace NU = 1 if unemp == . & labforce == 0 & unemp[_n+1] == 1 & _n < _N
bys id: replace NU = 0 if unemp == . & labforce == 0 & unemp[_n+1] == . & _n < _N

* NE rate
sort id year q
gen NE = .
bys id: replace NE = 1 if unemp == . & labforce == 0 & unemp[_n+1] == 0 & _n < _N
bys id: replace NE = 0 if unemp == . & labforce == 0 & unemp[_n+1] == . & _n < _N


save "EEC_03", replace

}


*** Checks on EEC
{

use "EEC_03", clear
fcollapse (mean) unemp EU UE , by(year)
tabstat unemp EU UE , stat(mean) by(year)

* Check self-employment
use "EEC_03", clear
keep if unemp == 0 | selfemp == 1
tabstat selfemp , stat(mean) by(year)

}

*** Construct crosswalk for DADS from EEC from Prodego: departement-level
{

* keep only 2008 and earlier because otherwise self-employment not accounted for

use "EEC_03", clear
global Variables unemp labforce EU UE EN NE NU UN

* Select on employed workers
egen maxindex = max(index), by(id)

egen meanU = mean(unemp) , by(dep)

egen DepUnempBin = xtile( meanU ) , nq(5)
tabstat unemp , stat(mean) by(DepUnempBin)

gen ageBin = age >= 40

count // 214,463

gen occ1 = substr(occ2,1,1)
gen lastocc1unemp = substr(lastocc2unemp,1,1)

replace occ1 = lastocc1unemp if occ1 == "" // replaced 10k

* trim blank spaces
gen occ11 = strltrim(occ1)
gen occ12 = strrtrim(occ11)

drop occ1 occ11
rename occ12 occ12

count if occ1 == "" // 4.7k

* Drop individuals with no known occupation at all
gen MissingOcc = inlist(occ1,"0","1","2","3","4","5","6","7","8") == 0
egen NoOcc = min( MissingOcc ) ,  by(id)

tab NoOcc
drop if NoOcc == 1  // 3k cases


egen distinctOcc1 = nvals( occ1 ) , by(id) // 19k missing
tab distinctOcc

egen FreqOcc = nvals(occ1) , by(id occ1)
egen MaxFreqOcc = max( FreqOcc ) , by(id)

sort id index

destring occ1, replace force
egen MostFreqOcc = max( occ1 * ( FreqOcc == MaxFreqOcc ) ) , by(id)
drop if MostFreqOcc == . // drop 0
drop if MostFreqOcc == 0

drop occ1
rename MostFreqOcc occ1
tab occ1

* Drop individuals with csp = 0 (unknown) 7 (retired) and 8 (inactives)
drop if inlist(occ1,0,7,8) == 1 // drop 1500

gen n = 1

global Variables unemp labforce EU UE EN NE UN NU

collapse (mean) $Variables (sum) n [ w = wt ] , by(occ1 DepUnempBin ageBin)
	* No agebin to reduce number of groups
su n , d


* Check adjustment

foreach var in $Variables {
gen l`var' = log(`var')
}

foreach var in $Variables {

qui reghdfe `var' [ w = n ], a(occFE = occ1 cityFE = DepUnempBin ageFE = ageBin)
qui su `var' [ w = n ]
gen `var'adj = r(mean) + occFE + cityFE + ageFE
drop occFE cityFE ageFE


qui reghdfe l`var' [ w = n ], a(occFE = occ1 cityFE = DepUnempBin ageFE = ageBin)
qui su l`var' [ w = n ]
gen `var'ladj = exp( r(mean) + occFE + cityFE + ageFE )
drop occFE cityFE ageFE

}

foreach var in $Variables {

di "`var'"
qui corr `var' `var'adj [ w = n ]
local cor = r(rho)
local cor : di %3.2f `cor'
di "   level : `cor'"

qui corr `var' `var'ladj [ w = n ]
local cor = r(rho)
local cor : di %3.2f `cor'
di "   log   : `cor'"

}


* For occ1 = 1 and 2 (agriculture and business owners) :
* attribute the economy-wide average for rates
* because too many missing

save "EEC_TransitionRates", replace

}
		
*** Prepare transition rates with non-participation from EEC from Prodego
{

use "EEC_TransitionRates", clear

global Variables unemp labforce EU UE EN NE NU UN

su $Variables [ w = n ]

* replace missing by unconditional averages

foreach var in $Variables {

qui su `var' [ w = n ]
local meanvar = r(mean)
replace `var'adj = `meanvar' if `var'adj == .
replace `var'ladj = `meanvar' if `var'ladj == .

}

keep Dep ageBin occ1 *ladj

foreach var in $Variables {
rename `var' `var'_EE
}

rename unemp U_EE

save "temp/EETransProbaQuarterly", replace

}

*** CASD EXPORT: Construct crosswalk for DADS from EEC from CASD export
{

use "$root/source/EEC_transitions", clear

global Variables unemp labforce EU UE EN NE NU UN

foreach var in $Variables {
gen l`var' = log(`var')
}

foreach var in $Variables {

* Restrict regression for interior values for symmetry between level and log adjustment
qui reghdfe `var' if `var' > 0 & `var' < 1 [ w = NumberObs ], a(occFE = occ1 cityFE = CityUnempBin ageFE = ageBin)
qui su `var' [ w = NumberObs ]
gen `var'adj = r(mean) + occFE + cityFE + ageFE
drop occFE cityFE ageFE


qui reghdfe l`var' if `var' > 0 & `var' < 1 [ w = NumberObs ], a(occFE = occ1 cityFE = CityUnempBin ageFE = ageBin)
qui su l`var' [ w = NumberObs ]
gen `var'ladj = exp( r(mean) + occFE + cityFE + ageFE )
drop occFE cityFE ageFE

}

* Replace missing by simple averages of location and occupation
foreach var in $Variables {

forvalues i = 1(1)5 {
qui su `var' [ w = NumberObs ] if CityUnempBin == `i'
local meanvarCity`i' = r(mean)
}

forvalues j=1(1)8 {
qui su `var' [ w = NumberObs ] if occ1 == `j'
local meanvarOcc`j' = r(mean)
}

forvalues i = 1(1)5 {
forvalues j=1(1)8 {
foreach Var in `var'adj `var'ladj {

replace `Var' = 0.5 * ( `meanvarCity`i'' + `meanvarOcc`j'' ) ///
		if `Var' == . & CityUnempBin == `i' & occ1 == `j'
		
}
}
}

}

* Replace remaining missing by unconditional averages ;  0 actual changes
foreach var in $Variables {

qui su `var' [ w = NumberObs ]
local meanvar = r(mean)

replace `var'adj = `meanvar' if `var'adj == .
replace `var'ladj = `meanvar' if `var'ladj == .

}



* Check correlation
foreach var in $Variables {

di "`var'"
qui corr `var' `var'adj [ w = NumberObs ]
local cor = r(rho)
local cor : di %3.2f `cor'
di "   level : `cor'"

qui corr `var' `var'ladj [ w = NumberObs ]
local cor = r(rho)
local cor : di %3.2f `cor'
di "   log   : `cor'"

}


keep City ageBin occ1 *ladj Num

foreach var in $Variables {
rename `var' `var'_EE
}

drop unemp labforce
tostring occ1, replace force

preserve
drop Num 

save "EETransProbaQuarterlyCASD", replace
restore

* No location data
drop City

foreach var in EU UE EN NE NU UN {
rename `var'_EE `var'
}

collapse (mean) EU UE EN NE NU UN [ w = Num ] , by (occ1 ageBin)

foreach var in EU UE EN NE NU UN {
rename `var' `var'_EEocc
}

save "EEoccTransProbaQuarterlyCASD", replace


}


}




					***********************************
					*** DATA CONSTRUCTION : FRANCE: ***
					*** AMENITIES AND HOUSE PRICES  ***
					***********************************
					
					
{

*** Weather data
{

** Rainfall and temperature
{

foreach month in Jan April July October {

import excel "$root2/source/amenities/TempRain`month'2007.xlsx", sheet("Sheet1") firstrow clear

* Get departement froms station
split Station , p(" (")
drop Station Station1
rename Station2 dep

replace dep = substr(dep,1,2)
destring dep, replace force
keep if dep != .
distinct dep // 88

* Get rainfall
rename Pr rainfall
split rainfall , p(" m")
drop rainfall rainfall2
rename rainfall1 rainfall
destring rainfall, replace force

* Get temperatures
drop Tempmaxabs Tempminabs
rename Tempmaxmoy tmax
rename Tempminmoy tmin

foreach var in tmin tmax {
split `var' , p(" °")
drop `var' `var'2
rename `var'1 `var'
destring `var', replace force
}
gen temperature = ( tmax + tmin ) / 2
drop tmin tmax

gen month = "`month'"

save "temp/TempRain`month'2007", replace

}

clear
foreach month in Jan April July October {
append using "temp/TempRain`month'2007"
}

fcollapse (mean) rainfall temp , by(dep)

gen str2 deps = string(dep,"%02.0f")
drop dep 
rename deps dep
order dep temp rain

save "temp/TempRain2007Dep", replace

}

** Sun hours
{
import excel "$root2/source/amenities/SunHours.xlsx", sheet("Sheet1") firstrow clear
drop rank name
destring dep, replace force
drop if dep == .

gen str2 deps = string(dep,"%02.0f")
drop dep 
rename deps dep

save "temp/sun", replace
}

** Merge
{
use "temp/TempRain2007Dep", clear
merge 1:1 dep using "temp/sun", nogen keep(1 3) // all matched
rename SunHours sunhours
save "TempRainSun2007Dep", replace
}

}

cd "${root2}/output"

*** Residential amenities data : basic
{

* load BPE 2007
use "${root2}/amenities/source/BPE 2007.dta/Stata/bpe07_ensemble", clear

keep depcom nb_equip typequ
rename depcom comr
rename nb_equip nb
rename typequ type

* Collapse
collapse (sum) nb, by(comr type) // divides size by two, because of iris subdivisions

* Save 
save "temp/bpe07_0", replace

}

*** Residential amenities data : advanced
{

* Get labels of equipments
import dbase "${root2}/source/amenities/varmod_ensemble.dbf", clear
keep if VARIABLE == "TYPEQU"
drop VARIABLE
rename MODALITE type
rename MOD name

gen cat = ""
replace cat = "Basic public" ///
	if substr(type,1,2) == "A1" || type == "A201"
	
replace cat = "Commercial" ///
	if inlist(type,"A203","A501","A502","A504","A506","A507","B102") | ///
	substr(type,1,2) == "B2" | substr(type,1,2) == "B3"
	
replace cat = "Education" if ///
	inlist( substr(type,1,2),"C1","C2","C3","C4" ) 
	
replace cat = "Health" if ///
	substr(type,1,1) == "D" & substr(type,1,2) == "D7"
replace cat = "Tourism" if  substr(type,1,1) == "G" 

save "temp/AmenitiesType", replace

* Put to cross format
use "temp/bpe07_0", clear
merge m:1 type using "temp/AmenitiesType", nogen keep(3)
fcollapse (sum) nb , by(comr cat)
save "temp/bpe07_1", replace

collapse (first) nb, by(comr)
keep comr
save "temp/bpe07_0_com", replace

use "temp/bpe07_0", clear
merge m:1 type using "temp/AmenitiesType", nogen keep(3)
collapse (first) nb, by(cat)
keep cat

cross using "temp/bpe07_0_com"
erase "temp/bpe07_0_com.dta"

merge 1:1 comr cat using "temp/bpe07_1", nogen keep(1 3)
replace nb = 0 if nb==.

* Merge with population
merge m:1 comr using "$root/source/pop07.dta", nogen keep(3)

* Density of equipments
gen ldnb = log( 1 + nb/pop )
gen dnb = nb/pop
drop nb

gen dep = substr(comr,1,2)
save "bpe2007", replace

}


*** House prices: public source
{
	
import delimited "/Users/adrien/Dropbox/JMP/7_statarev/source/HousePrices/valeursfoncieres-2018.txt", clear 

* basic clean
keep if inlist(naturemutation,"Vente")
keep if inlist(nombredelots  ,0,1)                 
* keep if inlist(typelocal     ,"Appartement","Maison")
save "temp/HousePriceNotaires", replace	
	
	
use "temp/HousePriceNotaires", clear

* clean value
rename valeurfonciere value
replace value = substr(value,1,length(value)-3)
destring value, replace force
keep if value ~= .

* clean area
rename surfacereellebati area	
drop if area == 0

* clean number of rooms
rename nombrepiecesprincipales numrooms

* clean commune
destring codedep, replace force
gen comr = 1000 * codedep + codecom
drop if comr == .
tostring comr, replace force

* remove all other variables
keep comr value area numrooms
order comr value area numrooms

* keep only large enough transactions
keep if value > 1e4

* generate and clean price per square meter
gen valuepsqm = value / area
keep if valuepsqm == . | ( valuepsqm > 500 & valuepsqm < 1e6 )

save "temp/HousePriceNotaires_2", replace	


use "temp/HousePriceNotaires_2", clear

* keep only if area is filled in
keep if area ~= .

* aggregate by comr
gcollapse (sum) value area, by(comr)
gen valuepsqm = value / area
keep if valuepsqm < 2*1e4 // to avoid small sample problems in some communes

save "temp/HousePriceNotaires_3", replace

* merge private and public source
use "temp/HousePriceNotaires_3", clear
keep comr  valuepsqm
rename valuepsqm housepriceNotaires

save "HousePrices2011Com_2", replace

}

*** Merge all and aggregate at the CZ level
{

use "bpe2007", clear
drop if cat == ""
gen cat2 = subinstr(cat," ","",.)
drop cat
reshape wide dnb ldnb , i(comr) j(cat2) s
order comr dep pop

rename comr com
merge m:1 com using "MunicipalityArea", nogen
rename com comr

merge m:1 comr using "HousePrices2011Com_2", nogen
/*
   Result                           # of obs.
    -----------------------------------------
    not matched                        12,299
        from master                       115  
        from using                     12,184  

    matched                            26,061  
    -----------------------------------------
*/

merge m:1 dep using "TempRainSun2007Dep", nogen
/*

    Result                           # of obs.
    -----------------------------------------
    not matched                        12,487
        from master                    12,487  
        from using                          0  

    matched                            25,873  
    -----------------------------------------
*/

drop ldnb*

merge 1:1 comr using "CrosswalkCommuneToZoneEmploi", nogen
/*
    Result                           # of obs.
    -----------------------------------------
    not matched                         5,392
        from master                     1,653  
        from using                      3,739  

    matched                            36,707  
    -----------------------------------------
*/

save "temp/AmenitiesHousePrices_0", replace


use "temp/AmenitiesHousePrices_0", clear

keep if ze != .

foreach var in Basic Commercial Education Health Tourism {
replace dnb`var' = dnb`var' * pop
}

egen spop = sum(pop), by(ze)
gen popshare = pop / spop

foreach var in housepriceNotaires temp rainfall sunhours {
replace `var' = popshare * `var' // for those var, weight by population
								 // to get average per capita exposure 
}

fcollapse (sum) dnb* pop temp housepriceNotaires rainfall sunhours area , by(ze)

* drop if ze >= 100 & ze < 1000 // dom-tom, no data

foreach var in Basic Commercial Education Health Tourism {
rename dnb`var' Num`var'
}
order ze pop temp rainfall sunhours Num* area housepriceNotaires 

save "AmenitiesHousePrices2007ZE", replace


}


}					
					
					
										
					*************************************
					*** DATA CONSTRUCTION : FRANCE:  ****
					*** MERGE DADS AND EEC   		 ****
					*************************************

						

{

*** Fill in age ze and dep and occ1 for merge with EEC 
{

use "dads_4", clear
	
* Reduce the size of the dataset for speed: drop variables not used in sequel
drop start* end* day* qnwc nwc birth_dep date_join dgwc dnwc firm_size plant_size payroll
gsort gid year q

* Index quarters non-employed
gsort id year q
gen qne = 0
bys gid: replace qne = 1 if employed == 0 & employed[_n-1] == 1
bys gid: replace qne = qne[_n-1] + 1 if employed == 0 & employed[_n-1] == 0

* Index NE-spells
gsort gid year q
bys gid: gen NEspell = sum( qne[_n-1] == 0 & qne == 1 )	
replace NEspell = . if employed == 1
gegen TopQne = max(qne) , by(id NEspell) 
gen MidQne = floor(TopQne / 2 ) + 1

* Age bin
gen ageBin = age >= 40

* Occupation 1 digit
gen occ1 = floor(cs2/10)

* Attribute unique occ1 to individual to avoid missing values in subsequent merge
gegen FreqOcc = nvals(occ1) , by(gid occ1)
gegen MaxFreqOcc = max( FreqOcc ) , by(id)

destring occ1, replace force
gegen MostFreqOcc = max( occ1 * ( FreqOcc == MaxFreqOcc ) ) , by(id)

drop occ1
rename MostFreqOcc occ1

* drop pre-retired people and individuals with missing occ1
tab occ1
drop if occ1 == 7 | occ1 == . // drop 2.5m
su employed // mean = 0.80


		*** Fill in ze and occ1 in non-employment periods
		
order gid year q employed ze
gen dep = substr(comr,1,2)

local varliste dep ze

	* Detect first non-employment spell
sort gid year q
bys gid: gen FirstNESpell = _n == 1
replace FirstNESpell = 0 if employed == 1 & FirstNESpell == 1
bys gid: replace FirstNESpell = 1 if FirstNESpell[_n-1] == 1 & employed != 1 & _n > 1

* Fill ze and occ1 moving forward in time
foreach var in `varliste' {
sort id year q
bys id: replace `var' = `var'[_n-1] if employed != 1 & _n > 1
}

* Fill ze moving backward in time for the other half
* Fill in ze moving back time all the way for the first non-employment spell
foreach var in `varliste' {
gsort gid -year -q
* bys id: replace `var' = `var'[_n-1] if employed != 1 & _n > 1 & qne >= MidQne
bys gid: replace `var' = `var'[_n-1] if employed != 1 & _n > 1 & FirstNESpell == 1
}

drop FirstNESpell
sort gid year q

save "temp/dads_4_00", replace

}

*** Compute ZE bins in DADS
{

use age year minyear ze employed qne using "temp/dads_4_00", clear

* compute unemployment with current variables
keep if age >= 30 & age <= 52
keep if year > minyear & year >= 2003
keep if qne <= 8

gen U = 1 - employed 
gen pop = 1
drop if ze == .

fcollapse (mean) U (sum) pop , by(ze)

egen CityUnempBin = xtile( U ) , nq(5) weights(pop)

drop pop U

save "temp/uRateZeBinsDADS", replace
}

*** Crosswalk in EEC between city quintiles and ze
{

use "$root/source/CityMeans", clear
drop if City == . // important for merge later on

keep City Num unemp
egen CityUnempBin = xtile( unemp ),  nq(5) weights( Num )
keep City CityUnempBin
rename City ze

save "temp/EECunempBins", replace

}

*** Merge with EEC
{

use "temp/dads_4_00", clear

* Merge with EEC
merge m:1 ze using "temp/EECunempBins"
rename CityUnempBin CityUnempBinEEC
drop _m

* Complement missing ze in EEC
merge m:1 ze using "temp/uRateZeBinsDADS"
drop _m

replace CityUnempBinEEC = CityUnempBin if CityUnempBinEEC == .
drop CityUnempBin
rename CityUnempBinEEC CityUnempBin

* Some individuals will be unmatched because they do not have a ze
* For those, use the average rates based on occupation only
tostring occ1, replace force

* Merge with EEC transition probabilities with city info
merge m:1 CityUnempBin ageBin occ1 using "EETransProbaQuarterlyCASD"

* Drop unmatched observations from EEC
drop if _m == 2
drop _m

* Although observation look matched, some have missing ze because of merged in datasets
count if ze == . // 5m

foreach var in EU UE EN NE UN NU {
replace `var'_EE = . if ze == .
}

* Merge with EEC transition probabilities without city info
merge m:1 ageBin occ1 using "EEoccTransProbaQuarterlyCASD"

foreach var in EU UE EN NE UN NU {
replace `var'_EE = `var'_EEocc if `var'_EE == . // about 1m changes
}

drop _m
drop *EEocc
count if EU_EE == . // 0

* Sort and save
sort gid year q

set seed 27021991
gen random = runiform()

save "temp/dads_4_01", replace

}

*** Define unemployment vs. non-participation
{

use "temp/dads_4_01", clear

gen E = employed == 1
gen U = .
gen N = .

* ENE transitions
gen ENE = .
sort gid year q
bys gid: replace ENE = 1 if E == 1 & E[_n+1] == 0 & _n < _N
bys gid: replace ENE = 1 if E == 1 & _n == _N & year == 2007 & q < 4
bys gid: replace ENE = 0 if E == 1 & E[_n+1] == 1 & _n < _N
* su ENE

* Transition probabilites
gen EUcondENE = EU_EE / ( EU_EE + EN_EE )
su EUcondENE, d
gen UU_cond_U_and_notUE = ( 1 - UN_EE - UE_EE ) / ( 1 - UE_EE ) // always in 0-1
gen NN_cond_N_and_notNE = ( 1 - NU_EE - NE_EE ) / ( 1 - NE_EE ) // always in 0-1

* Individual index
sort id year q
bys id: gen NumObs = _N
bys id: gen index = _n
qui su NumObs
global maxN = r(max)
drop NumObs

* Other transitions
* Recursive approach to not generate missing values
forvalues i=2(1)$maxN {

di ""
di "******"
di "i = `i' / $maxN "

	* EU
	
bys id: replace U = 1 if ENE[_n-1] == 1 & _n == `i' & random <= EUcondENE
bys id: replace N = 0 if ENE[_n-1] == 1 & _n == `i' & random <= EUcondENE

	* EN
	
bys id: replace U = . if ENE[_n-1] == 1 & _n == `i' & random > EUcondENE
bys id: replace N = 1 if ENE[_n-1] == 1 & _n == `i' & random > EUcondENE
	
	* UU
	
bys id: replace U = 1 if U[_n-1] == 1 & E == 0 & _n == `i' & random <= UU_cond_U_and_notUE
bys id: replace N = 0 if U[_n-1] == 1 & E == 0 & _n == `i' & random <= UU_cond_U_and_notUE
	
	* UN
	
bys id: replace U = . if U[_n-1] == 1 & E == 0 & _n == `i' & random > UU_cond_U_and_notUE
bys id: replace N = 1 if U[_n-1] == 1 & E == 0 & _n == `i' & random > UU_cond_U_and_notUE

	* NN
	
bys id: replace U = . if N[_n-1] == 1 & E == 0 & _n == `i' & random <= NN_cond_N_and_notNE
bys id: replace N = 1 if N[_n-1] == 1 & E == 0 & _n == `i' & random <= NN_cond_N_and_notNE

	* NU
	
bys id: replace U = 1 if N[_n-1] == 1 & E == 0 & _n == `i' & random > NN_cond_N_and_notNE
bys id: replace N = 0 if N[_n-1] == 1 & E == 0 & _n == `i' & random > NN_cond_N_and_notNE

}

drop TopQne *UnempBin ageBin FreqOcc MaxFreqOcc *_EE *cond* annai comr comt
drop random index ENE NEspell

save "temp/dads_4_02", replace
}

*** Define EU, UE, etc. transitions
{

use "temp/dads_4_02", clear

replace U = 0 if U == . & E == 1
replace N = 0 if E == 1

gen labforce = 1 - N 

order id year q E U labforce N

* Some checks
su U // 0.13
su U if year > minyear // 0.15
su U if age >= 30 & age <= 52 & year > minyear // 0.11
su U if age >= 30 & age <= 52 & year > minyear & TotalYears >= 8 // 0.09
* EEC has 6%
sort gid year q

* EU and EN
foreach d in U N {

gen E`d' = 0 if E == 1
sort id year q
bys id: replace E`d' = 1 if E == 1 & `d'[_n+1] == 1 & _n < _N
bys id: replace E`d' = 1 if E == 1 & `d'[_n+1] == 1 & _n == _N & year == 2007 & q < 4

}

* UE and UN
foreach d in E N {

gen U`d' = 0 if U == 1
sort id year q
bys id: replace U`d' = 1 if U == 1 & `d'[_n+1] == 1 & _n < _N

}

* NE and NU
foreach d in E U {

gen N`d' = 0 if N == 1
sort id year q
bys id: replace N`d' = 1 if N == 1 & `d'[_n+1] == 1 & _n < _N

}

* EE
gen EE = 0 if E == 1
sort id year q
bys id: replace EE = 1 if E == 1 & E[_n+1] == 1 & _n < _N & siren[_n+1] != siren


sort gid year q

save "temp/dads_4_03", replace

}

*** Fill in other variables
{

use "temp/dads_4_03", clear

* Fill in covariates during unemployment: set to values of the last employment spell
* For the initial unemployment spell, we do not see those covariates -> changes the sample in regressions
local varliste gwc qgwc lqgwc tenure siren nic public apen apet occ1 cs2 cs4 // cs2 cs3 cs4  ; Skills are already merged by id

foreach var in `varliste'  {
di "`var'"

* Fill moving forward in time
sort gid year q
bys gid: replace `var' = `var'[_n-1] if employed != 1 & _n > 1

}

foreach var in `varliste'  {
di "`var'"

* Fill moving backward in time for half the spell
gsort gid -year -q
bys gid: replace `var' = `var'[_n-1] if employed != 1 & _n > 1 & qne >= MidQne

}

save "temp/dads_4_04", replace

}

*** Define useful variables
{

use "temp/dads_4_04", clear
* Merge with skills, CZ variables, CZ and firm wage premia
merge m:1 id using "skills", nogen keep(1 3) // 33k not matched, 41m matched
keep if skill != . // drop 173k

* Sort again
sort gid year q

* Industries
rename apet apet4

foreach i in 2 3 {
gen apet`i' = substr(apet4,1,`i')
}

* Create groups
	* Occupations
foreach c in 2 4 {
fegen gcs`c' = group(cs`c')
}
	* Industries
foreach i in 2 3 4 {
fegen gapet`i' = group(apet`i')
}

	* Workers, plants and firms
gen siret = 1e4 * siren + nic 

* Movers
egen NumZE = nvals(ze), by(id)
gen mover = NumZE > 1

* Movers in 2003-2007 only
gen ze03 = ze if year >= 2003
egen NumZE03 = nvals(ze03), by(id)
gen mover03 = NumZE03 > 1
drop ze03

* Firm switchers
egen NumFirm = nvals(siren), by(id)
gen switcher = NumFirm > 1

* Migration
sort gid year q
bys gid: gen Mig    = ze[_n+1] != ze[_n] & _n < _N
bys gid: gen MigEE  = ze[_n+1] != ze[_n] & _n < _N & EE[_n] == 1
bys gid: gen MigEUE = ze[_n+1] != ze[_n] & _n < _N & U[_n]  == 1
bys gid: gen MigENE = ze[_n+1] != ze[_n] & _n < _N & employed[_n]  == 0
bys gid: gen MigEEE = ze[_n+1] != ze[_n] & _n < _N & employed[_n]  == 1 & employed[_n+1] == 1

* Save
save "dads_5", replace
}

*** Smaller dataset for analysis
{

use "dads_5", clear

* Drop individuals before they first are employed
* Will burn the first year later on
keep if year > minyear // drop 12m
keep if year >= 1997 // drop 3m
keep if age >= 30 & age <= 52 // drop 11m

su U // 11%
su U if year <= maxyear // 8.5%
su U if TotalYear >= 8 // 9.5%

gegen p25 = max( pan25 == "1" ), by(gid)
drop pan25

drop if p25 == 0 & year < 2002 // drop 270k

su U // 11%
su U if year <= maxyear + 1 // 9.7%
su U if TotalYear >= 8 // 9.5%

keep if year <= maxyear + 1

drop N employed nic qgwc gwc MidQne Reference

rename skill Skill

order id year q age  labforce E U EU EN UE UN NE NU EE ///
	  ze lqgwc ///

* Sort
sort gid year q
count // 18m

* Construct consistent cs4
drop gcs4
preserve
keep if ( year == 2002 & q == 4 ) | ( year == 2003 & q == 1 )
keep if E == 1 & cs4 != ""
gsort gid year
bys gid: keep if _N == 2
keep gid cs4 year
rename cs4 cs

reshape wide cs, i(gid) j(year)
gen E = 1
gcollapse (sum) E, by(cs2002 cs2003)

* need to clean up code
gen cs02 = lower(cs2002)
gen cs03 = lower(cs2003)
drop *00*
drop if strlen(cs02) < 4 | strlen(cs03) < 4
drop if substr(cs02,1,1) == " " | substr(cs03,1,1) == " "

gegen SumE = sum(E), by(cs03)
gen ShareE = E / SumE

gsort cs03 ShareE
su ShareE, d

bys cs03: keep if _n == _N
keep cs02 cs03

save "CrossWalkCs4_2003to2002", replace
restore

rename cs4 cs03
merge m:1 cs03 using "CrossWalkCs4_2003to2002", nogen keep(1 3)

replace cs03 = cs02 if year >= 2003
drop cs02
rename cs03 cs4

gen cs3 = substr(cs4,1,3)
destring cs3, replace force
destring cs4, replace force

replace cs3 = . if cs3 == 999
replace cs4 = . if cs4 == 9999

* Save	
save "dads_6", replace	


}
	
	
}



					***********************************
					*** PRODUCTIVITY DATA: FRANCE  ****
					***********************************

{
	
*** Construct plant-level dataset with value added
{

** Prepare mapping from 3-digit occupations to skills
{

use Skill agebin cs2 using "dads_6", clear
keep if cs2 != . & agebin != .
gcollapse (mean) Skill, by(cs2 agebin)
save "OccSkill", replace

}

** Basic plant data from postes
{

* Plant-level data and merge with skills
forvalues t=2002(1)2007 { // 2002(1)2007    

if `t' <= 2005 {
	local CS cs
}
if `t' > 2005 {
	local CS 
}

use ident_s siren siret nic datdeb datfin s_brut eff_moy_et apet zempt pcs `CS' age using "/Users/adrien/Desktop/Perso/Data/DADS/Postes/source/dads`t'" if  datdeb != . , clear

rename datdeb start
rename datfin end
rename eff empet

if `t' <= 2005 {
	rename cs cs2
}
if `t' > 2005 {
	gen cs2 = substr(pcs,1,2)
	destring cs2, replace force
}
	
	
gen agebin = floor(age/5)

* Days and etp
gen days = end-start + 1
gen etp = days / 360 

* Occupations and skills
fmerge m:1 agebin cs2 using "OccSkill", keep(1 3) // important to keep non-matched obs. to accurately compute size
drop if substr(siren,1,1) == "F"

* Collapse
fcollapse (first) siren nic empet apet (max) zempt (sum) etp s_brut (mean) Skill, by(siret) // EU EUgroup

corr empet etp // 0.9906

* Save etp when empet is non-missing
replace etp = empet if etp == 0 & empet != . // 0 changes

drop empet
rename etp empet
gen plant_wage = s_brut / empet

drop if empet==0 // means that there were only spells with missing start and end ; and duree is always missing in this case
	* Drop 280k
	
gen year = `t'
	
save "temp/PlantsPostes`t'", replace
}


* CZ-level data
forvalues t=2002(1)2007 {

* use datdeb datfin zempt s_brut using "/Users/adrien/Desktop/Perso/DADS/Postes/source/dads`t'" , clear
use "temp/PlantsPostes`t'", clear

merge m:1 comr 

* Collapse
gcollapse (sum) s_brut empet , by(zempt)

gen dw = s_brut / empet
gen year = `t'
	
save "temp/WagePopPostes`t'", replace	

}



* Merge all
clear
forvalues t=2002(1)2007 {
append using "temp/PlantsPostes`t'"
}
sort siren nic year
save "PlantsPostes0207", replace // About half with skill data

clear
forvalues t=2002(1)2007 {
append using "temp/WagePopPostes`t'"
}
save "WagePopPostes0207", replace

}

** Construct plant-level VA per worker
{

* merge ficus data
clear
forvalues y=1996(1)2007 { // 1996(1)2007 // start in 96 because before not all variables
	local Y = substr("`y'",3,2)
	append using "/Users/adrien/Desktop/Perso/Data/SUSE/stata/tempOutsourcing/ficus`Y'_01"
}
replace dep = substr(dep,1,2)
replace com = dep + com

keep siren year va sales inputs ebe emp wage ape YearCrea YearExit zemp com monor charsoc immo*
	* monor = 2 means all activity within same region
gsort siren year
bys siren year: keep if _n == 1 // remove 106 duplicates
save "FirmVA9607", replace


* construct sample of single-plant firms
	
* Merge with Plant data from Postes
use "PlantsPostes0207", clear
merge m:1 siren year using "FirmVA9607"
gegen MaxMerge = max(_m), by(siren)
tab MaxMerge
keep if MaxMerge == 3
drop MaxMerge
save "MultiplePlantVA9607_0", replace

* Keep only single-plant firms
gegen Nplant = nvals(siret), by(siren year)
*tab Nplant // 77% at 1
*tab Nplant if monor == "2"
keep if monor == "2"
gegen MaxNplant = max(Nplant), by(siren)
keep if MaxNplant == 1

* define average skill of the firm
gegen MeanSkill = mean( Skill ), by(siren)

count if MeanSkill != .
*  10,407,564 out of 12m

keep if MeanSkill != .
drop monor Nplant Skill MaxNplant
rename MeanSkill Skill 
keep if emp > 0
corr emp empet // 0.88
drop empet siret nic zempt

save "SinglePlantVA9607", replace

}
	
** Prepare mean EU rates by ze
{

use "dads_6" if E == 1, clear

gen w = exp(lq)
gcollapse (mean) EU lq (sum) E w, by(ze)
gen lE = log(E)
gen lw = log( w / E )

scatter lq lE
keep if lE > 5 // otherwise too much measurement error

rename lw lwageZeDADS
rename lq lwageZeDADS2
rename lE lempZeDADS

gsort EU

gen EUrankUW = _n
replace EUrankUW = EUrankUW / EUrankUW[_N]

gen EUrankW = sum(E)
replace EUrankW = EUrankW / EUrankW[_N]

rename E  EmpZePanel
rename EU EUZePanel

save "temp/DadsEUze9707", replace

}

** Single plant: merge with local wages and population, and identify entrants
{

* ape crosswalk
use "SinglePlantVA9607", clear

keep siren year ape emp
keep if inlist(year,2002,2003)
gsort siren year
bys siren: replace emp = emp[_n-1] if _n > 1

reshape wide ape, i(siren) j(year)
fcollapse (sum) emp, by(ape2002 ape2003)
keep if ape2002 != "" & ape2003 != ""

gsort ape2003
gegen SumEmp = sum(emp), by(ape2003)
gen ShareEmp = emp / SumEmp

sort ape2003 ShareEmp
bys ape2003: keep if _n == _N
drop emp ShareEmp SumEmp

save "temp/CrosswalkApe2003ToApe2002Ficus", replace


* merge in ape crosswalk
use "SinglePlantVA9607", clear
gegen minyear = min(year), by(siren)
gegen maxyear = max(year), by(siren)
drop _m

* use existing ape 2002 if firm in sample before 2002
gsort siren year
bys siren: replace ape = ape[_n-1] if minyear <= 2002

* replace ape with crosswalk for firms that enter after 2003
rename ape ape2003
merge m:1 ape2003 using "temp/CrosswalkApe2003ToApe2002Ficus" // 900k matched
replace ape2003 = ape2002 if minyear >= 2003 // 900k changes

rename ape2003 ape
drop ape2002 _m apet s_brut plant_wage

* remove firms with missing industry info
drop if ape == ""
drop if siren == ""

gsort siren year
tab minyear
tab maxyear
save "temp/SinglePlantVA9607_01", replace


* add ze info
use "temp/SinglePlantVA9607_01", clear

rename com comr
merge m:1 comr using "CrosswalkCommuneToZoneEmploi"
drop if _m == 2
gegen MaxZe = max(ze), by(siren)
replace ze = MaxZe
keep if ze != .

drop _m MaxZe YearExit
rename zemp zeOldDef

* compute ze-level wages and employment
gcollapse (sum) empZe = emp wageZe = wage, by(ze) merge

replace wageZe = wageZe / empZe

rename wages wagebill
gen wage = wagebill / emp

* Identify continuously open plants
egen Nyears = nvals(year), by(siren)
gen disco = Nyears != maxyear - minyear + 1

tab disco // 1.5m ones
drop if disco == 1
drop disco

* Entrants: year of entry and next year
gen Entry = year == minyear & minyear > 1996
gen Exit = year == maxyear & maxyear < 2007

* variables of interest
gen vapw = va / emp

* Logs
foreach var in empZe wageZe emp wage va vapw {
gen l`var' = log(`var')
}

* Industry
rename ape ape3
gen ape2 = substr(ape,1,2)

forvalues i=2(1)3 {
gegen gape`i' = group(ape`i')
	* Save missing industry
gegen MAXgape`i' = max(gape`i'), by(siren)
replace gape`i' = MAXgape`i'
drop MAXgape`i'
}

* Normalize
foreach var in lwageZe lempZe {
su `var' [w=emp], d
replace `var' = ( `var'-r(mean) ) / ( r(p75) - r(p25) )
}

* Growth rates
gsort siren year

foreach var in vapw va {
bys siren: gen dl`var' = l`var' - l`var'[_n-1] if _n > 1
bys siren: gen G`var'  = 2*(`var' - `var'[_n-1])/(`var' + `var'[_n-1]) if _n > 1
bys siren: replace G`var' =   2 if _n == 1 & Entry == 1
bys siren: replace G`var' = - 2 if _n == 1 & Exit  == 1
}

* Job destruction and job creation
gsort siren year

bys siren: gen JobCrea     = 2 * max( emp - emp[_n-1] , 0 ) / ( emp + emp[_n-1] )
bys siren: replace JobCrea = 2 if _n == 1 & Entry == 1

bys siren: gen JobDes = - 2 * min( emp - emp[_n-1] , 0 ) / ( emp + emp[_n-1] )
bys siren: replace JobDes = - 2 if _n == _N & Exit == 1

gen JobRealloc = JobCrea + JobDes

bys siren: gen JobWeight = ( emp + emp[_n-1] ) / 2 if _n > 1
bys siren: replace JobWeight = emp / 2 if _n == 1 & Entry == 1

save "temp/SinglePlantVA9607_02", replace

* keep only interior years for comparability
use "temp/SinglePlantVA9607_02", clear
drop if inlist(year,1996,2007)

* Merge with separation rate data
merge m:1 ze using "temp/DadsEUze9707", nogen keep(3)

save "Ficus9706", replace


}

** Multiple plant firms
{
	
use "MultiplePlantVA9607_0", clear
keep if year >= 2002 // 9.9m obs left

* define average skill of the estab
gegen MeanSkill = mean( Skill ), by(siret)

count if MeanSkill != .
keep if empet > 0
keep if siren != ""
keep if siret != "" // drop 1.4m

* Ape crosswalk
gegen minyear = min(year), by(siret)
gegen maxyear = max(year), by(siret)
drop _m

* use existing apet 2002 if firm in sample before 2002
gsort siret year
order siret year apet minyear maxyear

* remove firms with missing industry info
drop if apet == ""
drop if siren == ""
drop if siret == ""

gsort siret year

save "temp/MultiplePlantVA9607_1", replace


* add ze info
use "temp/MultiplePlantVA9607_1", clear

rename com comr
merge m:1 comr using "CrosswalkCommuneToZoneEmploi"
drop if _m == 2
gegen MaxZe = max(ze), by(siret)
replace ze = MaxZe
keep if ze != .

drop _m MaxZe
rename zempt zeOldDef

* compute ze-level wages and employment
gcollapse (sum) empZe = emp wageZe = wage, by(ze) merge

replace wageZe = wageZe / empZe

rename wages wagebill
gen wage = wagebill / emp

* Identify continuously open plants
egen Nyears = nvals(year), by(siret)
gen disco = Nyears != maxyear - minyear + 1

tab disco // 500k ones
drop if disco == 1
drop disco

* Entrants: year of entry and next year
gen Entry = year == minyear & minyear > 2002
gen Exit = year == maxyear & maxyear < 2007

* variables of interest
gegen SumSirenWagebill = sum(wagebill), by(siren year)
gen ShareWage = wagebill / SumSirenWagebill
gen vat = ShareWage * va

rename empet empt
egen empn = sum(empt), by(siren year)

drop if va == .

rename va van
gen vanpw = van / empn
gen vatpw = vat / empt

* Logs
foreach var in empZe wageZe emp wage van vat vanpw vatpw {
gen l`var' = log(`var')
}

* Industry
rename apet apet4
gen apet3 = substr(apet4,1,3)
gen apet2 = substr(apet4,1,2)

forvalues i=2(1)3 {
gegen gapet`i' = group(apet`i')
	* Save missing industry
gegen MAXgapet`i' = max(gapet`i'), by(siret)
replace gapet`i' = MAXgapet`i'
drop MAXgapet`i'
}

* Normalize
foreach var in lwageZe lempZe {
qui su `var' [w=emp], d
replace `var' = ( `var'-r(mean) ) / ( r(p75) - r(p25) )
}

* Growth rates
foreach g in n t {
	gsort sire`g' year

	foreach var in va`g'pw va`g' {
	bys sire`g': gen dl`var' = l`var' - l`var'[_n-1] if _n > 1
	bys sire`g': gen G`var'  = 2*(`var' - `var'[_n-1])/(`var' + `var'[_n-1]) if _n > 1
	bys sire`g': replace G`var' =   2 if _n == 1 & Entry == 1
	bys sire`g': replace G`var' = - 2 if _n == 1 & Exit  == 1
	}
}

* Job destruction and job creation
foreach g in t {

	gsort sire`g' year

	bys sire`g': gen JobCrea`g'     = 2 * max( emp`g' - emp`g'[_n-1] , 0 ) / ( emp`g' + emp`g'[_n-1] )
	bys sire`g': replace JobCrea`g' = 2 if _n == 1 & Entry == 1

	bys sire`g': gen JobDes`g' = - 2 * min( emp`g' - emp`g'[_n-1] , 0 ) / ( emp`g' + emp`g'[_n-1] )
	bys sire`g': replace JobDes`g' = - 2 if _n == _N & Exit == 1

	gen JobRealloc`g' = JobCrea`g' + JobDes`g'

	bys sire`g': gen JobWeight`g' = ( emp`g' + emp`g'[_n-1] ) / 2 if _n > 1
	bys sire`g': replace JobWeight`g' = emp`g' / 2 if _n == 1 & Entry == 1
}

* Merge with separation rate data
merge m:1 ze using "temp/DadsEUze9707", nogen keep(3)

save "MultiplePlantVA0207", replace

}


}

** Crosswalk between new and old ze
{

use "Ficus9706", clear

keep siren year ze zeOld emp
gcollapse (mean) emp, by(siren ze zeOld)
gcollapse (sum) emp, by(ze zeOld)

gsort zeOld
gegen SumEmp = sum(emp), by(zeOld)
gen ShareEmp = emp / SumEmp
su ShareEmp, d // quite concentrated too

gsort zeOld ShareEmp
bys zeOld: keep if _n == _N
drop emp SumEmp ShareEmp

save "CrosswalkOldZeToNewZe", replace


}		
		
*** Compare summary statistics between EEC and DADS
{

global VarComp U labforce EU EN UE NE UN NU
use id year $VarComp using "dads_6" if year >= 2003 , clear
fcollapse (mean) $VarComp
save "temp/SummaryStatDADS", replace


use "EEC_03" , clear
rename unemp U 
keep if NoRate == 0
keep if year >= 2003

collapse (mean) $VarComp [ w = wt ]
gen n = 1
save "temp/SummaryStatEEC", replace

use "temp/SummaryStatDADS", clear
foreach var in $VarComp {
rename `var' `var'2
}

gen n = 1
merge 1:1 n using "temp/SummaryStatEEC", nogen keep(3)

foreach var in $VarComp {
rename `var' `var'1
}

reshape long $VarComp , i(n) j(dataset)

gen source = "DADS" if dataset == 2
replace source = "EEC" if dataset == 1
drop dataset n

gen ssU = EU / ( EU + UE )

order source U ssU labforce EU EN UE NE UN NU

tabstat U ssU labforce EU EN UE NE UN NU , stat(mean) by(source) format(%9.3fc)

/*

Compare 2003-2007 in both samples

source |         U       ssU  labforce        EU        EN        UE        NE        UN        NU
-------+------------------------------------------------------------------------------------------
  DADS |     0.100     0.109     0.931     0.021     0.011     0.173     0.166     0.131     0.139
   EEC |     0.071     0.055     0.903     0.015     0.008     0.261     0.052     0.127     0.089
-------+------------------------------------------------------------------------------------------
 Total |     0.086     0.082     0.917     0.018     0.010     0.217     0.109     0.129     0.114
--------------------------------------------------------------------------------------------------

*/

label variable U "Unemployment rate"
label variable ssU "Implied unemp. rate from losing and finding"
label variable labforce "Participation rate"
label variable EU "E-to-U probability"
label variable UE "U-to-E probability"

replace source = "LFS" if source == "EEC"

eststo clear
* eststo TABLE : estpost tabstat U ssU labforce EU UE , by(source) stat(mean)  // format(%9.3fc)
estpost tabstat U ssU labforce EU UE , ///
	by(source) statistics(mean) columns(statistics) listwise ///
	nototal  // format(%9.3fc)
esttab using "$tables/SummaryStas052821.tex" , label main(mean) nostar unstack noobs nonote nonumber ///
		title(Summary statistics) booktabs replace ///
		cells("mean(fmt(3))") nomtitles
	

}
		
}
	
						
						
						
					**********************************************
					*** DATA CONSTRUCTION : FRANCE: VACANCIES ****
					**********************************************
	
	
{

*** Get New CZ by siret from DADS postes
{

use "PlantsPostes0207", clear
keep siret zempt
gsort siret zempt
bys siret zempt: keep if _n == 1
bys siret: gen N = _N
tab N


keep if N == 1
drop N

rename zempt zeOldDef
tostring zeOld, replace force

merge m:1 zeOldDef using "CrosswalkOldZeToNewZe", nogen keep(1 3)
keep if ze != .

save "temp/SiretCZ", replace

}

*** Clean vacancy datasets
{

global varlistacemo siret efftot efftot1 diffrec ///
					 empvac nbvac c_apetd
global acemopath /Users/adrien/Desktop/Data/ACEMO DATA/Pro2002-2007(ChristineChambaz)/stata
					 
forvalues y=2003(1)2007 {
	forvalues q=1(1)4 {
	
		use "$acemopath/pro`y'`q'", clear
		rename *, lower
		keep $varlistacemo
		order $varlistacemo
		
		rename efftot emp
		rename efftot1 emp1
		rename diffrec DifficultyRecruit
		rename empvac HasVacancies
		rename nbvac Vacancies
		rename c_apetd apet
		
		gen year = `y'
		gen quarter = `q'
		
		save "$root2/source/acemo/CleanPro`y'`q'", replace
			
	}
}

}

*** Merge all years and quarters, and add CZ data
{

* Merge all acemo datasets
clear 
forvalues y=2003(1)2007 {
	forvalues q=1(1)4 {
		
		append using "$root2/source/acemo/CleanPro`y'`q'"
	
	}
}
foreach var in DifficultyRecruit HasVacancies {
	gen `var'0 = 1 if `var' == "O"
	replace `var'0 = 0 if `var' == "N"
	replace `var'0 = . if `var' == ""
	drop `var'
	rename `var'0 `var'
}
order siret apet year quarter emp emp1 Vacancies HasVacancies Diff
sort siret year quarter

* Basic cleaning
keep if inlist(substr(siret,1,1),"0","1","2","3","4") == 1 | ///
		inlist(substr(siret,1,1),"5","6","7","8","9") == 1

drop if Vacancies == . 
count if HasVacancies == 0 & Vacancies > 0	
br if HasVacancies == 0 & Vacancies > 0		
		
drop if HasVacancies == 0 & Vacancies > 0	
drop HasVacancies

* Add CZ
merge m:1 siret using "temp/SiretCZ"
keep if _m == 3
drop _m

* EU rates and quantile from panel
merge m:1 ze using "temp/DadsEUze9707"
keep if _m == 3
drop _m

* Save		
count // 507,755
	
save "Acemo0307", replace

}


}					


						
						*******************************
						*** DATA CONSTRUCTION : US ****
						*******************************

						
{

*** CPS data
	
* Import and basic selection
{

** Import csv and save to stata format
use "$root2/source/cps/cps", clear

* Restrict years to make computations lighter for now
keep if year >= 1996 //  & year <= 2007

* Remove group quarters
drop if gq != 1 & gq != 2 & gq != .

* Keep only white males prime aged: 25-54
	* Males
keep if sex == 1
	* Whites
keep if race == 100
	* Age
keep if age >= 25 & age <= 54
	* Household heads
keep if relate==101

drop sex race race relate gq
drop occly ind90ly uhrsworkly firmsize jtyears jtyearago jtsame 
drop county

* Save 
save "cps_01", replace

}

* More cleaning
{

use "cps_01", clear

* Compute education bins
gen educ2 	  = 1 if educ <= 70 				// High school dropout
replace educ2 = 2 if educ == 73 				// High school graduate
replace educ2 = 3 if inlist(educ,81,91,92)		// Occupational degree or some college
replace educ2 = 4 if educ >= 111 				// College graduate or more

drop educ
rename educ2 educ

* Work type
gen 	WorkType = 1 if floor(class/10) == 1 		// Self-employed
replace WorkType = 2 if floor(class/20) == 2 		// Wage worker

drop class

* Labor force status
gen unemp     = 1 if floor(empstat/10) == 2
replace unemp = 0 if floor(empstat/10) == 1
replace unemp = . if floor(empstat/10) == 3 | empstat == 0

* Checks
su unemp if labforce == 1 // no obs, so ok
su unemp if labforce == 2 // 900k obs, mean = 0.03
su unemp if labforce == 0 // 900k obs, mean = 0.03

* Drop if no known labor force status
drop if empstat == 0 | labforce == 0

* Recode labforce
gen labforce2 	  = 1 if labforce == 2 			// In the labor force
replace labforce2 = 0 if labforce == 1			// Out of the labor force
drop labforce
rename labforce2 labforce

* Age bins
gen agebin = 5 * floor(age/5)

* Replace key variables in non-asec and then drop asec
sort cpsidp mish asecflag
order cpsidp mish asec
foreach var in hhincome wkswork1 ftotval inctot incwage {
bys cpsidp: replace `var' = `var'[_n-1] if asec == 2 & asec[_n-1] == 1
}
bys cpsidp: replace wtfinl = wtfinl[_n+1] if asec == 1  & asec[_n+1] == 2

drop if asec == 2 // drop 100k
drop if wtfinl == . // drop 65k


* Mincer regression for wages
gen lw = log(incwage) 							// Annual wage income

reghdfe lw [w = wtfinl], a(year educ agebin occ ind1990 metarea statefip, savefe)

gen skill = __hdfe2__ + __hdfe3__ + __hdfe4__ // 1m missing
drop __hdfe*
egen skill2 = mean(skill), by(cpsidp) // 800k missing
drop skill
rename skill2 skill

save "cps_02", replace

}
	
* Keep only individuals with variables filled in and in the labor force
{
use "cps_02", clear
rename cpsidp id

* Education
egen educ2 = max(educ), by(id)
drop educ
rename educ2 educ
drop if educ == . // dropped 8k

* Skill
su skill

* Number of months in survey
egen Nmonths = nvals(mish), by(id)
tab Nmonths

* Construct transition rates
drop if id == 0 // drop 50k
gsort id mish
order id mish year
duplicates drop id mish, force // drop 70k
gsort id mish

gegen MinMish = min(mish), by(id)
gegen MaxMish = max(mish), by(id)

keep if MinMish == 1 // drop 500k

* UE rate
sort id mish
gen UE = .
bys id: replace UE = 1 if unemp == 1 & unemp[_n+1] == 0 & labforce == 1 & mish !=4 & _n < _N
bys id: replace UE = 0 if unemp == 1 & unemp[_n+1] == 1 & labforce == 1 & mish !=4 & _n < _N

* UN rate
sort id mish
gen UN = .
bys id: replace UN = 1 if unemp == 1 & labforce == 1 & labforce[_n+1] == 0 & mish !=4 & _n < _N
bys id: replace UN = 0 if unemp == 1 & labforce == 1 & labforce[_n+1] == 1 & mish !=4 & _n < _N

* EU rate
sort id mish
gen EU = .
bys id: replace EU = 1 if unemp == 0 & unemp[_n+1] == 1 & labforce == 1 & mish !=4 & _n < _N
bys id: replace EU = 0 if unemp == 0 & unemp[_n+1] == 0 & labforce == 1 & mish !=4 & _n < _N

* EN rate
sort id mish
gen EN = .
bys id: replace EN = 1 if unemp == 0 & labforce == 1 & labforce[_n+1] == 0 & mish !=4 & _n < _N
bys id: replace EN = 0 if unemp == 0 & labforce == 1 & labforce[_n+1] == 1 & mish !=4 & _n < _N

* NU rate
sort id mish
gen NU = .
bys id: replace NU = 1 if unemp == . & labforce == 0 & unemp[_n+1] == 1 & mish !=4 & _n < _N
bys id: replace NU = 0 if unemp == . & labforce == 0 & unemp[_n+1] == . & mish !=4 & _n < _N

* NE rate
sort id mish
gen NE = .
bys id: replace NE = 1 if unemp == . & labforce == 0 & unemp[_n+1] == 0 & mish !=4 & _n < _N
bys id: replace NE = 0 if unemp == . & labforce == 0 & unemp[_n+1] == . & mish !=4 & _n < _N

* Save
order id mish unemp labforce UE EU EN NE EN NU
save "cps_05", replace
}

* Create skill groups and save dataset with skills
{
use "cps_05", clear

preserve

sort id mish
bys id: keep if _n == 1
egen DistinctCities = nvals(metarea)
egen DistinctCities2 = max(DistinctCities)
local n = DistinctCities2[1]
drop Distinct*
xtile skillbin = skill [ w = wt ], nq(`n') // as many as metareas
drop skill
rename skillbin skill
keep id skill
save "temp/skillsCPS", replace

restore

drop skill
merge m:1 id using "temp/skillsCPS", nogen keep(1 3)

count // 1,914,124
count if skill != . // 558,345

save "cps_06", replace

}

}



							****************
							*** EMPIRICS ***
							****************

							
{

*** DADS U-rate vs. EEC by city
{

** U-rate comparison
{

use U labforce EU UE ze year age using "dads_6" if year >= 2003 , clear
gen pop = 1

fcollapse (mean) U labforce EU UE (sum) pop , by(ze)

rename ze City

foreach var in U labforce EU UE pop {
rename `var' `var'_DADS
}

merge 1:1 City using "$root/source/CityMeans"
keep if _m == 3
drop _m

* Important
drop if City == .

rename NumberObs pop
rename unemp U

foreach var in U labforce EU UE pop {
rename `var' `var'_EEC
}

save "temp/CityMeansDadsEec", replace



use "temp/CityMeansDadsEec", clear
{

* Re-center
qui su U_DADS
local meanU_DADS = r(mean)
qui su U_EE
local meanU_EEC = r(mean)
replace U_DADS = U_DADS + `meanU_EEC' - `meanU_DADS'


* Select sample
su U_DADS, d
global Cut = 0.15

foreach var in U_EEC U_DADS {
replace `var' = . if `var' > $Cut
}


* Regression
reg U_EEC U_DADS

global R2 : di %3.2f e(r2)
global slope : di %3.2f _b[ U_DADS ]
global cons : di %3.2f _b[ _cons ]
global se : di %3.2f _se[ U_DADS ]

qui test U_DADS == 1
global pvalue : di %3.2f r(p)

gen lU_EEC = log(U_EEC)
gen lU_DADS = log(U_DADS)

reg lU_EEC lU_DADS

gen predict = floor( U_DADS * 10 ) / 10
sort predict
bys predict: replace predict = . if _n > 1
foreach var in predict U_DADS {
egen max`var' = max(`var')
egen min`var' = min(`var')
}
replace predict = 1 * maxU_DADS if predict == maxpredict
replace predict = 1 * minU_DADS if predict == minpredict
gen fit = $cons + $slope * predict
}

* Graph: U-rate
local name UrateEECvsDADS053021
scatter U_EEC U_DADS [ w = pop_EEC ] if U_EEC < $Cut & U_DADS < $Cut & U_EEC > 0  , msymbol(circle_hollow) mcolor("$MyBlue") msize(0.75) || ///
function y = x, range(0.0 0.15) clwidth(0.5) lcolor("$MyOrange") || ///
function y = $cons + $slope * x, range(0.0 0.15) clwidth(0.5) lcolor("$MyOrange") lpattern(dash) ///
	 xsize(6) ysize(7) ///
	 xlabel(,labsize(small) nogrid ) ylabel(,labsize(small) nogrid ) ///
	 xscale(range(0 0.15) titlegap(3) ) yscale(range(0 0.15) titlegap(3) ) ///
	 xlabel(0(0.05)0.15) ylabel(0(0.05)0.15) ///
	 xtitle("DADS unemployment rate",size(small)) ytitle("LFS unemployment rate",size(small)) ///
	 legend(on cols(1) order(1 2 3) size(small) 	///
			   label(1 "Data") ///
			   label(2 "45 degree line") ///
			   label(3 "Regression line: LFS = $cons + $slope ($se) * DADS; R2 = $R2") ///
			   region(lcolor(white)) ) ///
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 name(`name', replace) 
graph export "${graphs}/`name'.pdf", name(`name') replace

}

**  U vs N in the cross section of cities
{

foreach data in EEC DADS {

	use "temp/CityMeansDadsEec", clear

	gen out_`data' = 1 - labforce_`data'
	gen sample = U_`data' < 0.15 & out_`data' < 0.15 & U_`data' > 0.03 & out_`data' > 0.03 & pop_`data' >= 100
	reg out_`data' U_`data' [w=pop_`data'] if sample == 1

	global R2 : di %3.2f e(r2)
	global slope : di %3.2f _b[ U_`data' ]
	global cons : di %3.2f _b[ _cons ]
	global se : di %3.2f _se[ U_`data' ]

	local name UvsNonPart`data'053021
	scatter out_`data' U_`data' [w=pop_`data'] if sample == 1 , msymbol(circle_hollow) mlcolor("$MyBlue") msize(0.75) ///
		|| function y = x , range(0.03 0.15) lcolor("$MyOrange") lwidth(0.5) ///
		|| function y = $cons + $slope * x , range(0.03 0.15) lcolor("$MyOrange") lwidth(0.5) lpattern(dash) ///
		, xsize(12) ysize(14) ///
		xscale(range(0.03 0.15) titlegap(3) ) yscale(range(0.03 0.15) titlegap(3) ) ///
		xlabel(0.05(0.05)0.15 , labsize(small) nogrid) ///
		ylabel(0.05(0.05)0.15 , labsize(small) nogrid) ///
		xtitle("Unemployment rate" , size(small) ) ///
		ytitle("Non-participation rate" , size(small) ) ///
		graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
		legend(on cols(1) order(1 2 3) size(small) 	///
				   label(1 "Data") ///
				   label(2 "45 degree line") ///
				   label(3 "Regression line: Non-part. = $cons + $slope ($se) * U; R2 = $R2") ///
				   region(lcolor(white)) ) ///
		name(`name', replace)
	graph export "${graphs}/`name'.pdf", name(`name') replace	
	
}


}
	
}
	
*** Persistence + changes over time
{

** Aggregate data for France
{

* Location unemployment
use "dads_6" if year >= 1997 & ze != . , clear
gen pop = 1
fcollapse (mean) U EU UE (sum) pop , by(ze year)
rename ze City
save "temp/AgZeYearFrance", replace

* Predicted change based on industry trends
use "dads_6" if year >= 1997 & ze != . , clear
gen yearbin = year >= 2002
rename ze City
rename gapet2 Ind
fcollapse (sum) E , by(City Ind yearbin)
save "temp/EmploymentCityIndTwoPeriodsFrance", replace


use "temp/EmploymentCityIndTwoPeriodsFrance", clear
reshape wide E , i(City Ind) j(yearbin)
forvalues i=0(1)1 {
replace E`i' = 0 if E`i' == .
egen sE`i' = sum(E`i')
replace E`i' = E`i' / sE`i'
drop sE`i'
egen E`i'Ind = sum(E`i'), by(Ind)
egen E`i'City = sum(E`i'), by(City)
}

gen w0 = E0/E0City
gen dEInd = 2 * ( E1Ind - E0Ind ) / ( E1Ind + E0Ind )

* Winsorize because extreme values driven by small industries
qui su dEInd, d
replace dEInd = r(p5) if dEInd < r(p5)
replace dEInd = r(p95) if dEInd > r(p95)
	* growth rate is between -32% and 24%

egen dECityPred = sum( w0 * dEInd ) , by(City)
	* between -6.6% and 2.7%

keep City dECityPred E0	
fcollapse (first) dE (sum) emp = E0 , by(City)

save "temp/CityEmpGrowthPredFrance", replace

}

** Aggregate data for US : CPS
{

use "cps_05", clear
keep if metarea != .
rename met City
gen pop = 1
keep if year >= 1996 & year <= 2007
gen yearbin = year >= 2002

* Drop months where we cannot calculate rates
keep if mish != 4
sort id mish
bys id: keep if _n < _N

* Drop when City is missing
drop if floor(City/10) == 999

* Adjust weights monthly
egen Swt = sum( wt ) , by( year month)
replace wt = wt / Swt
drop Swt

* Weight observations: number of available obs varies by variable (U, UE, EU); affects the denominator of the weights
foreach var in EU UE unemp {
di "`var'"
gen wt`var' = wtfinl * ( `var' != . )
egen swt`var' = sum ( wt`var' ) , by( City year )
replace wt`var' = wt`var' / swt`var'
replace `var' = `var' * wt`var'
}

* Average by City
fcollapse (sum) pop unemp UE EU , by( City year )

rename unemp U

save "temp/AgZeYearUScps", replace







* Predicted change based on industry trends

use "cps_05", clear
keep if metarea != .
rename met City
gen pop = 1
keep if year >= 1996 & year <= 2007
gen yearbin = year >= 2002

* Drop months where we cannot calculate rates
keep if mish != 4
sort id mish
bys id: keep if _n < _N

* Drop when City is missing
drop if floor(City/10) == 999

* Adjust weights monthly
egen Swt = sum( wt ) , by( year month)
replace wt = wt / Swt
drop Swt

keep if unemp == 0
gen E = 1

* tostring ind1990, replace force
gen ind = string(ind1990,"%03.0f")
gen Ind = substr(ind,1,2)
destring Ind, replace force

fcollapse (sum) E , by(City Ind yearbin)

save "temp/EmploymentCityIndTwoPeriodsUScps", replace



use "temp/EmploymentCityIndTwoPeriodsUScps", clear

reshape wide E , i(City Ind) j(yearbin)

forvalues i=0(1)1 {

replace E`i' = 0 if E`i' == .

egen sE`i' = sum(E`i')
replace E`i' = E`i' / sE`i'
drop sE`i'

egen E`i'Ind = sum(E`i'), by(Ind)
egen E`i'City = sum(E`i'), by(City)

}

gen w0 = E0/E0City
gen dEInd = 2 * ( E1Ind - E0Ind ) / ( E1Ind + E0Ind )

* Winsorize because extreme values driven by tiny industries
qui su dEInd, d
replace dEInd = r(p5) if dEInd < r(p5)
replace dEInd = r(p95) if dEInd > r(p95)
	* Now growth rate is between -42% and 81%

egen dECityPred = sum( w0 * dEInd ) , by(City)
	* between -11% and 5%

keep City dECityPred	
fcollapse (first) dE , by(City)

save "temp/CityEmpGrowthPredUScps", replace




* Predicted change based on skill trends

use "cps_05", clear
keep if metarea != .
rename met City
gen pop = 1
keep if year >= 1996 & year <= 2007
gen yearbin = year >= 2002

rename educ Skill

* Drop months where we cannot calculate rates
keep if mish != 4
sort id mish
bys id: keep if _n < _N

* Drop when City is missing
drop if floor(City/10) == 999

* Adjust weights monthly
egen Swt = sum( wt ) , by( year month)
replace wt = wt / Swt
drop Swt

keep if unemp == 0
gen E = 1


fcollapse (sum) E , by(City Skill yearbin)

save "temp/EmploymentCitySkillTwoPeriodsUScps", replace



use "temp/EmploymentCitySkillTwoPeriodsUScps", clear

reshape wide E , i(City Skill) j(yearbin)

forvalues i=0(1)1 {

replace E`i' = 0 if E`i' == .

egen sE`i' = sum(E`i')
replace E`i' = E`i' / sE`i'
drop sE`i'

egen E`i'Skill = sum(E`i'), by(Skill)
egen E`i'City = sum(E`i'), by(City)

}

gen w0 = E0/E0City
gen dESkill = 2 * ( E1Skill - E0Skill ) / ( E1Skill + E0Skill )

* Winsorize because extreme values driven by tiny industries
qui su dESkill, d
replace dESkill = r(p5) if dESkill < r(p5)
replace dESkill = r(p95) if dESkill > r(p95)

egen dECityPredSkill = sum( w0 * dESkill ) , by(City)

keep City dECityPredSkill	
fcollapse (first) dE , by(City)

save "temp/CityEmpGrowthPredSkillUScps", replace


}

** Aggregate data for US : Census
{

use "temp/census_01", clear

rename met City
gen pop = 1
drop if year == 2007
gen yearbin = year >= 2000

* Adjust weights annually
egen Swt = sum( wt ) , by( year )
replace wt = wt / Swt
drop Swt

* Weight observations: number of available obs varies by variable (U, UE, EU); affects the denominator of the weights
foreach var in U {
di "`var'"
gen wt`var' = wt * ( `var' != . )
egen swt`var' = sum ( wt`var' ) , by( City year )
replace wt`var' = wt`var' / swt`var'
replace `var' = `var' * wt`var'
}

* Average by City
fcollapse (sum) pop U , by( City year )

save "temp/AgZeYearUScensus", replace




* Predicted change based on industry trends

use "temp/census_01", clear
rename met City
keep if City != .
gen pop = 1
drop if year == 2007
gen yearbin = year >= 2000

* Adjust weights annually
egen Swt = sum( wt ) , by( year )
replace wt = wt / Swt
drop Swt

keep if U == 0
gen E = 1

* tostring ind1990, replace force
gen ind = string(ind1990,"%03.0f")
gen Ind = substr(ind,1,2)
destring Ind, replace force

fcollapse (sum) E , by(City Ind yearbin)

save "temp/EmploymentCityIndTwoPeriodsUScensus", replace



use "temp/EmploymentCityIndTwoPeriodsUScensus", clear

reshape wide E , i(City Ind) j(yearbin)

forvalues i=0(1)1 {

replace E`i' = 0 if E`i' == .

egen sE`i' = sum(E`i')
replace E`i' = E`i' / sE`i'
drop sE`i'

egen E`i'Ind = sum(E`i'), by(Ind)
egen E`i'City = sum(E`i'), by(City)

}

gen w0 = E0/E0City
gen dEInd = 2 * ( E1Ind - E0Ind ) / ( E1Ind + E0Ind )

* Winsorize because extreme values driven by tiny industries
qui su dEInd, d
replace dEInd = r(p5) if dEInd < r(p5)
replace dEInd = r(p95) if dEInd > r(p95)
	* Now growth rate is between -42% and 81%

egen dECityPred = sum( w0 * dEInd ) , by(City)
	* between -11% and 5%

keep City dECityPred
fcollapse (first) dE , by(City)

save "temp/CityEmpGrowthPredUScensus", replace






* Predicted change based on skill

use "temp/census_01", clear
rename met City
keep if City != .
gen pop = 1
drop if year == 2007
gen yearbin = year >= 2000

gen Skill = educ >= 80

* Adjust weights annually
egen Swt = sum( wt ) , by( year )
replace wt = wt / Swt
drop Swt

keep if U == 0
gen E = 1

fcollapse (sum) E , by(City Skill yearbin)

save "temp/EmploymentCitySkillTwoPeriodsUScensus", replace



use "temp/EmploymentCitySkillTwoPeriodsUScensus", clear

reshape wide E , i(City Skill) j(yearbin)

forvalues i=0(1)1 {

replace E`i' = 0 if E`i' == .

egen sE`i' = sum(E`i')
replace E`i' = E`i' / sE`i'
drop sE`i'

egen E`i'Skill = sum(E`i'), by(Skill)
egen E`i'City = sum(E`i'), by(City)

}

gen w0 = E0/E0City
gen dESkill = 2 * ( E1Skill - E0Skill ) / ( E1Skill + E0Skill )

* Winsorize because extreme values driven by tiny industries
qui su dESkill, d
replace dESkill = r(p5) if dESkill < r(p5)
replace dESkill = r(p95) if dESkill > r(p95)
	* Now growth rate is between -42% and 81%

egen dECityPredSkill = sum( w0 * dESkill ) , by(City)
	* between -8% and 10%

keep City dECityPredSkill	
fcollapse (first) dE , by(City)

save "temp/CityEmpGrowthPredSkillUScensus", replace



}

** Full Scatterplots
{

local Weight Pop // 
local Correction Lin // 
local cutCPS = 50
local cutCensus = 500
local cutFrance = 500 

foreach sample in France UScps UScensus { // 

use "temp/AgZeYear`sample'", clear

if "`sample'" != "UScensus" {
gen yearbin = year >= 2002
}
if "`sample'" == "UScensus" {
gen yearbin = year >= 2000
}

fcollapse (mean) U pop , by ( City yearbin)

if "`Weight'" == "Pop" {
gen W = pop
}
if "`Weight'" == "Unif" {
gen W = 1
}

gen lU = log(U)

* Merge in with industry growth rates
merge m:1 City using "temp/CityEmpGrowthPred`sample'", nogen keep(3)

if "`sample'" == "UScps" {
drop if pop < `cutCPS'
}

if "`sample'" == "UScensus" {
drop if pop < `cutCensus'
}

if "`sample'" == "France" {
drop if pop < `cutFrance'
}

* Create panel
sort City yearbin
xtset City yearbin

* Get elasticity of U to employment changes
gen dU = U - l.U
gen dlU = lU - l.lU

* egen Sample = max( U < 0.2 & l.U < 0.2 ) , by(City)

if "`Correction'" == "Log" {
qui reg dlU dECityPred [ w = W ] if yearbin == 1

predict lUfit
qui su lUfit [ w = W ]
replace lUfit = lUfit - r(mean)

gen lUind = lU
replace lUind = lU - lUfit if yearbin == 1
gen Uind = exp(lUind)
drop lU*

}

if "`Correction'" == "Lin" {
qui reg dU dECityPred [ w = W ] if yearbin == 1

predict Ufit
qui su Ufit [ w = W ]
replace Ufit = Ufit - r(mean)

gen Uind = U
replace Uind = U - Ufit if yearbin == 1

}

rename U Uraw


* Adjust for the change in average unemployment rate
foreach suf in raw ind {

egen mU`suf' = sum( U`suf' * W ) , by(yearbin)
egen spop = sum(W) , by(yearbin)
replace mU`suf' = mU`suf' / spop
drop spop

egen mU`suf'Init = max( mU`suf' * ( yearbin == 0 ) )
replace U`suf' = U`suf' - mU`suf' + mU`suf'Init

* Autocorrelation
qui reg U`suf' l.U`suf' // [w = W]
local rho`sample'`suf' =  _b[l.U`suf']
local rho`sample'`suf' : di %3.2f `rho`sample'`suf''

gen lU`suf' = log(U`suf')
qui  reg lU`suf' l.lU`suf' // [w = W]
local rho`sample'Log`suf' = _b[l.lU`suf']
local rho`sample'Log`suf' : di %3.2f `rho`sample'Log`suf''


* Trim data for display and autocorrelation
qui su U`suf', d
replace U`suf' = . if U`suf' < r(p1) | U`suf' > r(p99)

di "test"


* Graph
if inlist("`sample'","France") == 1 {
	local msize = 1
	local xlow = 0.02
	local xhigh = 0.15
	local xlablow = 0.025
	local labdiff = 0.025
}

if inlist("`sample'","UScps","UScensus") == 1 {
	local msize = 0.5
	local xlow = 0
	local xhigh = 0.08
	local xlablow = 0
	local labdiff = 0.02
}

local name PersUrate`sample'`suf'053121
tw scatter U`suf' l.U`suf' [ w = l.pop ] if U`suf' > 0 & l.U`suf' > 0 , msymbol(circle_hollow) mcolor("$MyBlue") msize(`msize') || ///
	 function y = x, range(`xlow' `xhigh') clwidth(0.5) lcolor("$MyOrange") ///
	 xsize(6) ysize(6) /// 
	 xlabel(,labsize(small) nogrid ) ylabel(,labsize(small) nogrid ) ///
	 xscale(range(`xlow' `xhigh') titlegap(3) ) ///
	 yscale(range(`xlow' `xhigh') titlegap(3) ) ///
	 xlabel(`xlablow'(`labdiff')`xhigh') ylabel(`xlablow'(`labdiff')`xhigh') ///
	 xtitle("Unemployment rate, 1997-2001",size(small)) ytitle("Unemployment rate, 2002-2007",size(small)) ///
	 legend(off order(1 2)  size(small) ///
			label(1 "Data") ///
			label(2 "45 degree line") region(lcolor(white)) ) ///
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 note(45d. Data: `suf'. Autocorr. of level = `rho`sample'`suf'' ; Autocorr. of log = `rho`sample'Log`suf'' ) ///
	 name(`name', replace) 
local path $graphs 
graph export "${graphs}/`name'.pdf", name(`name') replace


}

}

}

** Changes over time
{

use "temp/AgZeYearFrance", clear

gen yearbin = year >= 2002
gen UmU = U / ( 1 - U )

foreach var in EU UE UmU {
	gen l`var' = log(`var')
}

foreach var in U pop {
	egen `var'mean = mean(`var'), by(City)
	sort Umean year
}
keep if popmean > 1000 & Umean < 0.25

xtile CityBin = Umean [fw = pop], nq(10)

gcollapse (mean) lEU lUE lUmU, by(CityBin yearbin)

sort City yearbin
xtset City yearbin

gen mlUE = - lUE

gen lUc = lEU + mlUE

foreach var in lEU mlUE lUE lUmU lUc {
	
	gen d`var' = d.`var'
	qui su d`var'
	replace d`var' = d`var' - `r(mean)'
}

corr dlUc dlEU dmlUE, cov
reg dlEU dlUc // 44%
reg dmlU dlUc // 56%


* EU rate
local name FranceEUchange
scatter dlEU dlUc , msymbol(circle_hollow) mlcolor("$MyBlue") msize(4) /// 
	|| function y = x , range(-0.1 0.1) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.1 0.1) titlegap(3) ) yscale(range(-0.1 -0.1) titlegap(3) ) ///
	xlabel(-0.1(0.05)0.1 , labsize(small) nogrid) ///
	ylabel(-0.1(0.05)0.1 , labsize(small) nogrid) ///
	xtitle("change in predicted log unemployment / employment" , size(small) ) ///
	ytitle("change in log job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(off order(2 1) ///
		  label(2 "France") ///
		  label(1 "United States") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace


* UE rate
local name FranceUEchange
scatter dmlUE dlUc , msymbol(circle_hollow) mlcolor("$MyBlue") msize(4) /// 
	|| function y = x , range(-0.1 0.1) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.1 0.1) titlegap(3) ) yscale(range(-0.1 -0.1) titlegap(3) ) ///
	xlabel(-0.1(0.05)0.1 , labsize(small) nogrid) ///
	ylabel(-0.1(0.05)0.1 , labsize(small) nogrid) ///
	xtitle("change in predicted log unemployment / employment" , size(small) ) ///
	ytitle("change in - log job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(off order(2 1) ///
		  label(2 "France") ///
		  label(1 "United States") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace




	
}

}
	
*** Variance of unemployment onto EU and UE and participation
{

* PREPARE DATA FOR US
{

use "cps_05", clear
keep if year >= 1996 & year <= 2007

* Drop months where we cannot calculate rates
keep if mish != 4
sort id mish
bys id: keep if _n < _N

* Keep track of number of observations
gen pop = 1

* Some renaming
rename unemp U
rename wtfinl wt
rename met City

* Drop when City is missing
drop if floor(City/10) == 999

* Adjust weights monthly
egen Swt = sum( wt ) , by( year month)
replace wt = wt / Swt
drop Swt

local varliste U labforce UE EU EN NE NU UN

* Weight observations: number of available obs varies by variable (U, UE, EU); affects the denominator of the weights
foreach var in `varliste' {
gen wt`var' = wt * ( `var' != . )
egen swt`var' = sum ( wt`var' ) , by( City )
replace wt`var' = wt`var' / swt`var'
replace `var' = `var' * wt`var'
}

* Average by City
fcollapse (sum) pop `varliste' , by( City )

su pop , d

* Winsor correction when rate is 0
foreach var in `varliste' {
gen winsor`var' = 0
qui su `var' if `var' > 0 [w = pop] , d
replace winsor`var' = `var' < r(p1)
replace `var' = r(p1) if winsor`var' == 1
}
gen winsor = winsorU + winsorEU + winsorUE

foreach var in EU UE U {
gen l`var' = log(`var')
}
gen mlUE = - lUE

save "temp/USUrate", replace

}

* PREPARE DATA FOR FRANCE
{

** LFS: extract
{
use unemp UE EU NE NU UN EN labforce Num City using "$root/source/CityMeans", clear
drop if City == . // important for merge later on
gen EE = . // for compatibility with code below
rename unemp U
rename Num pop
keep if pop >= 100

* Winsor correction when rate is 0
foreach var in U labforce UE EU EN NE NU UN EE {
gen winsor`var' = 0
qui su `var' if `var' > 0 [ w = pop ] , d
replace winsor`var' = `var' < r(p1)
replace `var' = r(p1) if winsor`var' == 1
}
gen winsor = winsorU + winsorEU + winsorUE

foreach var in EU UE U EE {
gen l`var' = log(`var')
}
gen mlUE = - lUE

save "temp/FranceLfsUrate", replace
}

** LFS: dept
{
use unemp UE EU NE NU UN EN labforce dep wt using "EEC_03", clear
destring dep, replace force
drop if dep == .
rename dep City
gen EE = . // for compatibility with code below
rename unemp U
gen pop = 1

fcollapse (mean) U labforce UE EU EN NE NU UN EE (sum) pop , by( City )

keep if pop >= 1000

* Winsor correction when rate is 0
foreach var in U labforce UE EU EN NE NU UN EE {
gen winsor`var' = 0
qui su `var' if `var' > 0 [ w = pop ] , d
replace winsor`var' = `var' < r(p1)
replace `var' = r(p1) if winsor`var' == 1
}
gen winsor = winsorU + winsorEU + winsorUE

foreach var in EU UE U EE {
gen l`var' = log(`var')
}
gen mlUE = - lUE

save "temp/FranceLfsDepUrate", replace
}

** DADS
{
use U UE EU NE NU UN EN EE labforce ze year using "dads_6", clear
keep if year >= 1996 & year <= 2007
rename ze City
drop if City == .

* Weight observations: number of available obs varies by variable (U, UE, EU); affects the denominator of the weights
gen wt = 1
gen pop = 1

local varliste U labforce UE EU EN NE NU UN EE

fcollapse (mean) `varliste' (sum) pop , by( City )

* Winsor correction when rate is 0
foreach var in `varliste' {
gen winsor`var' = 0
qui su `var' if `var' > 0 [ w = pop ] , d
replace winsor`var' = `var' < r(p1)
replace `var' = r(p1) if winsor`var' == 1
}
gen winsor = winsorU + winsorEU + winsorUE

foreach var in EU UE U EE {
gen l`var' = log(`var')
}
gen mlUE = - lUE

save "temp/FranceUrate", replace
}

}

* CALCULATIONS: VARIANCE DECOMPOSITIONS of U vs. EU/UE
{

foreach sample in France FranceLfs FranceLfsDep US {

local sample France

use "temp/`sample'Urate", clear
gen lUc = log(U/(1-U))
gen lUcPredict = lEU - lUE

gen sample = ( pop >= 1000 ) & ( winsor == 0 )
keep if sample == 1

corr lEU lUE [ w = pop ]

* Compute time-aggregated flows
gen FpS = - log( 1 - UE - EU )
gen S   = FpS * EU / ( 1 - exp( - FpS ) )
gen F   = FpS * UE / ( 1 - exp( - FpS ) )

gen lS = log(S)
gen mlF = - log(F)

* Variance decomposition w/o participation
corr lEU mlUE [ w = pop ] , cov
mat CovNTA = r(C)

corr lS mlF [ w = pop ] , cov
mat CovTA = r(C)

* Variance decomposition w/ participation
gen lpred = log( EU / UE )
gen lcorr = log( 1 + ( NU * EN * UE - UN * EU * NE ) / ( EU * UE * ( NE + NU ) ) )

gen lpredParticip = log( ( EU * ( NE + NU ) + NU * EN ) / ( NE * ( UN + UE ) + NU * UE ) )

gen lcorrExact = lpredParticip - lpred

corr lUc lpred lcorrExact [ w = pop ] , cov
mat CovWeightExact = r(C)

* Variance decomposition w/ participation and EU UE breakdown
corr lUc lEU mlUE lcorrExact [ w = pop ] , cov
mat CovWeightExactBreak = r(C)

* save dataset for scatterplots below
save "temp/`sample'UrateDecomps", replace


clear

foreach type in NTA TA {

svmat Cov`type' , names("var`type'")
set obs 3

replace var`type'1 = 2 * var`type'1[2] if _n == 3
replace var`type'1 = var`type'2[2] if _n == 2
rename var`type'1 var`type'

egen tot`type' = sum(var`type')
replace var`type' = 100 * var`type' / tot`type'
drop tot`type'

}

drop *2

gen des = "Separation" 		if _n == 1
replace des = "Finding" 	if _n == 2
replace des = "Covariance" 	if _n == 3
gen n = _n
order des var*

save "temp/`sample'UrateEUUE", replace


* Re-arrange decompositions w/o breakdown
clear
set obs 4

gen des = "Total" if _n == 1
replace des = "Job losing and finding" if _n == 2
replace des = "Non-participation" if _n == 3
replace des = "Residual" if _n == 4

foreach w in WeightExact {

svmat Cov`w' , names("Cov`w'")
drop Cov`w'2 Cov`w'3
rename Cov`w' Cov`w' 
replace Cov`w'  = Cov`w'[1] - Cov`w'[2] - Cov`w'[3] if _n == 4

gen Cov`w'Percent = 100 * Cov`w' / Cov`w'[1]

}

gen n = _n
order des Cov*
save "temp/`sample'UrateParticip", replace


* Re-arrange decompositions w/ breakdown
clear
set obs 5

gen des = "Total" if _n == 1
replace des = "Job losing" if _n == 2
replace des = "Job finding" if _n == 3
replace des = "Non-participation" if _n == 4
replace des = "Residual" if _n == 5

foreach w in WeightExactBreak {

svmat Cov`w' , names("Cov`w'")
drop Cov`w'2 Cov`w'3 Cov`w'4
rename Cov`w' Cov`w' 
replace Cov`w'  = Cov`w'[1] - Cov`w'[2] - Cov`w'[3] - Cov`w'[4] if _n == 5

gen Cov`w'Percent = 100 * Cov`w' / Cov`w'[1]

}

gen n = _n
order des Cov*
save "temp/`sample'UrateParticipBreak", replace



}

}
					
* TABLE: VARIANCE DECOMPOSITIONS OF RAW DATA --->>> NEED TO RUN A MATLAB FILE TO CONSTRUCT THE TABLE
{

use "temp/USUrateEUUE", clear
rename varNTA varNTAUS
rename varTA varTAUS
merge 1:1 des using "temp/FranceUrateEUUE", nogen
rename varNTA varNTAFrance
rename varTA varTAFrance
merge 1:1 des using "temp/FranceLfsUrateEUUE", nogen
rename varNTA varNTAFranceLfs
rename varTA varTAFranceLfs
merge 1:1 des using "temp/FranceLfsDepUrateEUUE", nogen
rename varNTA varNTAFranceLfsDep
rename varTA varTAFranceLfsDep

order des n
sort n
drop n

export delimited using "VarDecompRawNoParticip.csv", replace



use "temp/USUrateParticip", clear
drop *Exact
drop if des == "Total"
rename Cov varUS
merge 1:1 des using "temp/FranceUrateParticip", nogen
drop *Exact
drop if des == "Total"
rename Cov varFrance
merge 1:1 des using "temp/FranceLfsUrateParticip", nogen
drop *Exact
drop if des == "Total"
rename Cov varFranceLfs
merge 1:1 des using "temp/FranceLfsDepUrateParticip", nogen
drop *Exact
drop if des == "Total"
rename Cov varDep

order des n
sort n
drop n

export delimited using "VarDecompRawParticip.csv", replace


}

* SCATTERPLOTS OF U vs. EU/UE IN RAW DATA
{

* Prepare data for both countries
foreach sample in US France {
use "temp/`sample'UrateDecomps", clear

* De-mean
foreach var in lUc lUcPredict lEU lUE mlUE {
qui su `var' [ w = pop ]
replace `var' = `var' - r(mean)
}

qui reg lUc lUcPredict [ w = pop ]
local R2free 	= floor( 100 * e(r2) )
local slope 	= floor( 100 *  _b[ lUcPredict ] ) / 100
local cons		= floor( 100 *  _b[_cons] ) / 100
local se 		= floor( 100 *  _se[ lUcPredict ] ) / 100

qui reg lUc lUcPredict [ w = pop ], noconstant
local R2cons 	= floor( 100 * e(r2) )

* Trim extremes for figure and regression
qui su lUcPredict, d
replace lUcPredict = . if lUcPredict < r(p1)
replace lUcPredict = . if lUcPredict > r(p99)

gen predict = floor( lUcPredict * 10 ) / 10
sort predict
bys predict: replace predict = . if _n > 1
foreach var in predict lUcPredict {
egen max`var' = max(`var')
egen min`var' = min(`var')
}
replace predict = 1 * maxlUcPredict if predict == maxpredict
replace predict = 1 * minlUcPredict if predict == minpredict
gen fit = `cons' + `slope' * predict

drop if abs(lEU) > 0.75 | abs(lUc) > 0.75 | abs(mlUE) > 0.75

* Aggregate
global Nb = 20 

sort lUc
gen cumpop = sum(pop)
gen rank = cumpop / cumpop[_N]

gen Q = .
forvalues i=1(1)$Nb {
replace Q = `i' if rank > ( `i' - 1 ) / $Nb
}
egen SumPop = sum(pop), by(Q)

foreach var in lUc lEU mlUE {
egen QMean_`var' = sum( pop * `var' ) , by(Q)
replace QMean_`var' = QMean_`var' / SumPop
}

save "temp/`sample'Scatter", replace

}

* Produce scatterplots
{
use "temp/FranceScatter", clear
gen US = 0
append using "temp/USScatter"
replace US = 1 if US == .


* Binned
local name FranceUS_EU053121
scatter QMean_lEU QMean_lUc if US == 1 , mcolor("$MyLightGreen") msize(3)  || /// 
scatter lEU lUc [ w = pop ] if US == 0 , msymbol(circle_hollow) mlcolor("$MyBlue") msize(1.25) ///
	|| function y = x , range(-0.75 0.75) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.75 0.75) titlegap(3) ) yscale(range(-0.75 -0.75) titlegap(3) ) ///
	xlabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	ylabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("log job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 1) ///
		  label(2 "France") ///
		  label(1 "United States") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace


local name FranceUS_UE053121
scatter QMean_mlUE QMean_lUc if US == 1 , mcolor("$MyLightGreen") msize(3)  || /// 
scatter mlUE lUc [ w = pop ] if US == 0 , msymbol(circle_hollow) mlcolor("$MyBlue") msize(1.25) /// 
	|| function y = x , range(-0.75 0.75) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.75 0.75) titlegap(3) ) yscale(range(-0.75 -0.75) titlegap(3) ) ///
	xlabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	ylabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("- log job finding rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 1) ///
		  label(2 "France") ///
		  label(1 "United States") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace



* EE rate
qui su lEE [w=pop]
replace lEE = lEE - r(mean)

local name FranceEE081721
scatter lUE lUc [ w = pop ] if US == 0 , msymbol(circle_hollow) mlcolor("$MyBlue") msize(1.25) || ///
scatter lEE lUc [ w = pop ] if US == 0 , msymbol(square_hollow) mlcolor("$MyGreen") msize(1.25) ///
	|| function y = x , range(-0.75 0.75) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.75 0.75) titlegap(3) ) yscale(range(-0.75 -0.75) titlegap(3) ) ///
	xlabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	ylabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("log job finding and JtJ rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(1 2) ///
		  label(1 "Job finding") ///
		  label(2 "Job-to-job mobility rate") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

reg lUE lUc [w=pop] if US == 0 // - 0.19
reg lEE lUc [w=pop] if US == 0 // + 0.46
reg lEU lUc [w=pop] if US == 0 // + 0.72

reg lEE lUc [w=pop] if City != 1101 & US == 0 // + 0.26



}

}

}

*** Variance decompositions of local EU onto Worker and Firm effects
{

* FIXED EFFECTS DECOMPOSITIONS
{

* Aggregate by group
global OptionDropStayers = 1
foreach var in EU { //  UE lqgwc

	global var `var'

	foreach group in siren siret sirencs2 siretcs2 sirencs3 siretcs3 sirencs4 siretcs4 { 

		use $var gid ze siren siret cs2 cs3 cs4 year using "dads_6" if year >= 1997 & $var != . , clear

		drop year
		rename gid Worker
		rename ze City
		
		forvalues i=2(1)4{
			if "`group'" == "sirencs`i'" {
				drop if cs`i' == .
				gegen `group' = group( siren cs`i' )
			}
			if "`group'" == "siretcs`i'"  {
				drop if cs`i' == .
				gegen `group' = group( siret cs`i' )
			}
		}
		
		drop if `group' == . | City == . | Worker == .

		gen q$var = 1

		gcollapse (sum) $var q$var , by( Worker City `group' )
		
		replace $var = $var / q$var
		
		* keep only "movers"
		if $OptionDropStayers == 1 {
			gegen Num`group' = nvals(`group'), by(Worker)
			gegen NumWorker  = nvals(Worker), by(`group')
			gegen NumCity    = nvals(City), by(Worker)
			
			drop if Num`group' <= 2 | NumWorker <= 2 | NumCity <= 2
			drop Num*
		}

		save "temp/Worker`var'`group'_01", replace

	}
}

* Construct quantiles
foreach nq in 10 20 50 100 300 {

	global NQ = `nq'
	
	foreach outcome in EU { // UE lqgwc
	
		global Y `outcome'
		
		foreach group in siren siret sirencs2 siretcs2 sirencs3 siretcs3 sirencs4 siretcs4 {  
		
			foreach var in Worker City `group' {
				
				di "${Y}"
				di "`group'"
				
				use "temp/Worker${Y}`group'_01", clear
				
				replace $Y = $Y * q$Y
				gcollapse (sum) $Y q$Y , by(`var')
				replace $Y = $Y / q$Y
				
				sort $Y
				
				gen rank = sum( q$Y )
				gen select = 1 if $Y > 0
				egen minrank = min( rank * select )
				replace rank = ( rank - minrank + 1 ) /( rank[_N]- minrank + 1 )
				replace rank = . if $Y == 0
				
				gen ${Y}Q`var' = .
				
				* Special bin for all those that have $Y = 0
				
				if ${Y}[1] > 0 {
					forvalues i=1(1)$NQ {
						replace ${Y}Q`var' = `i' if rank > (`i'-1)/$NQ & rank <= `i'/$NQ & $Y > 0
					}
				}
				
				if ${Y}[1] == 0 {
					replace ${Y}Q`var' = 1 if $Y == 0
				
					local NQm = $NQ - 1
					forvalues i=1(1)`NQm' {
						replace ${Y}Q`var' = `i'+ 1 if rank > (`i'-1)/`NQm' & rank <= `i'/`NQm' & $Y > 0
					}
				}
					
				keep `var' ${Y}Q`var'
						
				save "temp/Worker`group'_NQ${NQ}_${Y}Q`var'", replace
				
			}
			
			di "merge"
			use "temp/Worker${Y}`group'_01", clear
			
			foreach var in Worker City `group' {

				merge m:1 `var' using "temp/Worker`group'_NQ${NQ}_${Y}Q`var'", nogen keep(3)
			
			}
			
			save "temp/Worker`group'_NQ${NQ}_${Y}_02", replace
				
		}
	}
	
}

* Fixed effects regression
foreach nq in 10 20 50 100 300 {

	global NQ = `nq'
		
	foreach outcome in EU { 
		
		global Y `outcome'		
		
		* SIRET: NO CITY FE
		foreach group in siret siretcs2 siretcs3 siretcs4 { 

			use "temp/Worker`group'_NQ${NQ}_${Y}_02", clear
			replace $Y = 100 * $Y

			su $Y [w=q$Y]
			global Mean$Y = r(mean)

			reghdfe $Y [w=q$Y], a(`group'FE = ${Y}Q`group' WorkerFE = ${Y}QWorker)

			* Fill in missing FEs
			foreach var in `group' Worker {
				egen `var'FEfill = min(`var'FE), by(`var')
			}
				
			replace WorkerFEfill =  $Y - ${mean$Y} - `group'FEfill ///
				if WorkerFEfill == . & `group'FEfill != .
			replace `group'FEfill =  $Y - ${mean$Y} - WorkerFEfill ///
				if `group'FEfill == .  & WorkerFEfill != .
				
			local varlistfill $Y `group'FE WorkerFE `group'FEfill WorkerFEfill

			di "*********"
			di ""
			di "NQ = `nq'; dep. var. = `outcome'; group = `group'"
			di ""
			di "Unconditional summarize"	
			corr $Y `group'FE WorkerFE [w=q$Y], cov

			
			foreach var in `varlistfill' {
				replace `var' = `var' * q$Y
			}
			
			gcollapse (sum) `varlistfill' q$Y , by(${Y}QCity)
			
			foreach var in `varlistfill' {
				replace `var' = `var' / q$Y
			}
			
			di "Summarize after collapsed by city"
			corr $Y `group'FE WorkerFE [w=q$Y], cov
			corr $Y `group'FEfill WorkerFEfill [w=q$Y], cov
			
			save "temp/Worker`group'_NQ${NQ}_${Y}_03", replace

		}
		
		* SIREN: With CITY FE
		foreach group in siren sirencs2 sirencs3 sirencs4 { 

			use "temp/Worker`group'_NQ${NQ}_${Y}_02", clear
			replace $Y = 100 * $Y

			su $Y [w=q$Y]
			global Mean$Y = r(mean)

			reghdfe $Y [w=q$Y], a(CityFE = ${Y}QCity `group'FE = ${Y}Q`group' WorkerFE = ${Y}QWorker)

			* Fill in missing FEs
			foreach var in City `group' Worker {
				egen `var'FEfill = min(`var'FE), by(`var')
			}
				
			replace CityFEfill =  $Y - ${mean$Y} - WorkerFEfill - `group'FEfill ///
				if CityFEfill == . & WorkerFEfill != . & `group'FEfill != .
			replace WorkerFEfill =  $Y - ${mean$Y} - CityFEfill - `group'FEfill ///
				if WorkerFEfill == . & CityFEfill != . & `group'FEfill != .
			replace `group'FEfill =  $Y - ${mean$Y} - CityFEfill - WorkerFEfill ///
				if `group'FEfill == . & CityFEfill != . & WorkerFEfill != .
				
			local varlistfill $Y CityFE `group'FE WorkerFE CityFEfill `group'FEfill WorkerFEfill

			di "*********"
			di ""
			di "NQ = `nq'; dep. var. = `outcome'; group = `group'"
			di ""
			di "Unconditional summarize"	
			corr $Y CityFE `group'FE WorkerFE [w=q$Y], cov

			
			foreach var in `varlistfill' {
				replace `var' = `var' * q$Y
			}
			
			gcollapse (sum) `varlistfill' q$Y , by(${Y}QCity)
			
			foreach var in `varlistfill' {
				replace `var' = `var' / q$Y
			}
			
			di "Summarize after collapsed by city"
			corr $Y CityFE `group'FE WorkerFE [w=q$Y], cov
			corr $Y CityFEfill `group'FEfill WorkerFEfill [w=q$Y], cov
			
			save "temp/Worker`group'_NQ${NQ}_${Y}_03", replace

		}
		
		
	}
}

}

* GRAPHS
{

	* SIRET: NO CITY FE

*global NQ = 20
global NQ = 10 
global i = 4 // 4

use temp/Workersiretcs${i}_NQ${NQ}_EU_03, replace

su EU // [w=qEU]
gen EUdemean = EU - r(mean)

sort EUdemean

corr EUdemean siretcs${i}FE WorkerFE [w=qEU], cov
mat Cov = r(C)
svmat Cov , names("Cov")
drop Cov2 Cov3

gen WorkerPlusSiretcs${i}FE = siretcs${i}FE + WorkerFE

* normalize to ranks
gen rankCity = ( EUQCity - 1 ) / ( $NQ - 1 )
gen WorkerFErenorm 	    = WorkerFE - WorkerFE[1]
gen siretcs${i}FErenorm = siretcs${i}FE - siretcs${i}FE[1]

gen WorkerPlusSiretcs${i}FErenorm = WorkerFErenorm + siretcs${i}FErenorm

gen WorkerShare 		=  WorkerFErenorm / WorkerPlusSiretcs${i}FErenorm * rankCity
gen siretcs${i}Share    =  siretcs${i}FErenorm / WorkerPlusSiretcs${i}FErenorm * rankCity

replace siretcs${i}Share = 0 if siretcs${i}Share == .
replace WorkerShare      = 0 if WorkerShare == .

* standardize
qui su WorkerPlusSiretcs${i}FE, d
gen WorkerPlusSiretcs${i}FE_sd = ( 2 * WorkerPlusSiretcs${i}FE - ( r(max) + r(min) ) ) / ( r(max) - r(min) )
gen WorkerFE_sd = WorkerFE / WorkerPlusSiretcs${i}FE * WorkerPlusSiretcs${i}FE_sd
gen Siretcs${i}FE_sd = siretcs${i}FE / WorkerPlusSiretcs${i}FE * WorkerPlusSiretcs${i}FE_sd

sort WorkerPlusSiretcs${i}FE_sd

* Graph normalized at the left
local name WorkerVsSiretcs${i}NQ${NQ}_Left091321

local ShareWorker = floor( 100 * Cov1[3] / ( Cov1[2] + Cov1[3] ) )
local ShareJob    = 100 - `ShareWorker' // floor( 100 * Cov1[2] / Cov1[1] )

twoway area rankCity siretcs${i}Share rankCity, ///
	color("$MyBlue" "$MyOrange") lwidth(0 0) || ///
	scatter rankCity rankCity , ///
   	connect(l) msymbol(O) mlcolor("$MyBlue") mcolor("$MyBlue") msize(2) ///
   	clwidth(1) lcolor("$MyBlue") clpattern(solid) || ///
	scatter siretcs${i}Share rankCity , ///
   	connect(l) msymbol(O) mcolor("$MyOrange") mlcolor("$MyOrange") msize(2) ///
   	clwidth(1) lcolor("$MyOrange")  clpattern(solid) ///
	, xsize(6) ysize(7) ///
	xscale(range(0 1) titlegap(3) ) yscale(range(0 1) titlegap(3) ) ///
	xlabel(0(0.25) 1 , labsize(small) nogrid) ///
	ylabel(0(0.25) 1 , labsize(small) nogrid) ///
	xtitle("Standardized city job losing rate" , size(small) ) ///
	ytitle("Contributions of employer and worker effects" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 1) cols(2) symysize(5) ///
		  label(1 "Workers (`ShareWorker'%)") ///
		  label(2 "Employers (`ShareJob'%)") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace




	* SIREN: CITY FE
	
global NQ = 10 
global i = 4 // 4

if $i == 0 {
	use temp/Workersiren_NQ${NQ}_EU_03, replace
	gen sirencs${i}FE = sirenFE
}
if $i > 0 {
	use temp/Workersirencs${i}_NQ${NQ}_EU_03, replace
}

su EU // [w=qEU]
gen EUdemean = EU - r(mean)

sort EUdemean

corr EUdemean sirencs${i}FE WorkerFE CityFE [w=qEU], cov
mat Cov = r(C)
svmat Cov , names("Cov")
drop Cov2 Cov3

gen SumFE = sirencs${i}FE + WorkerFE + CityFE

* normalize to ranks
gen rankCity = ( EUQCity - 1 ) / ( $NQ - 1 )


gen WorkerFErenorm 	    = WorkerFE - WorkerFE[1]
gen sirencs${i}FErenorm = sirencs${i}FE - sirencs${i}FE[1]
gen CityFErenorm        = CityFE - CityFE[1]

gen SumFErenorm = WorkerFErenorm + sirencs${i}FErenorm + CityFErenorm

gen WorkerShare 		=  WorkerFErenorm / SumFErenorm * rankCity
gen sirencs${i}Share    =  sirencs${i}FErenorm / SumFErenorm * rankCity
gen CityShare           =  CityFErenorm / SumFErenorm * rankCity

replace sirencs${i}Share = 0 if sirencs${i}Share == .
replace WorkerShare      = 0 if WorkerShare == .
replace CityShare        = 0 if CityShare == .

	* for graph only
gen sirencs${i}PlusCityShare = sirencs${i}Share + CityShare
sort SumFErenorm

* Graph normalized at the left
local name WorkerVsSirencs${i}NQ${NQ}_Left072922

local ShareWorker = floor( 100 * Cov1[3] / ( Cov1[2] + Cov1[3] + Cov1[4] ) )
local ShareCity   = floor( 100 * Cov1[4] / ( Cov1[2] + Cov1[3]+ Cov1[4] ) )
local ShareJob    = 100 - `ShareWorker' - `ShareCity' // floor( 100 * Cov1[2] / Cov1[1] )

twoway area rankCity rankCity, ///
	color("$MyBlue") lwidth(0) || ///
area sirencs${i}PlusCityShare rankCity , ///
	color("$MyGreen") lwidth(0) || ///	
area sirencs${i}Share rankCity , ///
	color("$MyOrange") lwidth(0) || ///	
scatter rankCity rankCity , ///
   	connect(l) msymbol(O) mlcolor("$MyBlue") mcolor("$MyBlue") msize(2) ///
   	clwidth(1) lcolor("$MyBlue") clpattern(solid) || ///
scatter sirencs${i}PlusCityShare rankCity , ///
   	connect(l) msymbol(O) mlcolor("$MyGreen") mcolor("$MyGreen") msize(2) ///
   	clwidth(1) lcolor("$MyGreen") clpattern(solid) || ///
scatter sirencs${i}Share rankCity , ///
   	connect(l) msymbol(O) mcolor("$MyOrange") mlcolor("$MyOrange") msize(2) ///
   	clwidth(1) lcolor("$MyOrange")  clpattern(solid) ///
	, xsize(6) ysize(7) ///
	xscale(range(0 1) titlegap(4) ) yscale(range(0 1) titlegap(4) ) ///
	xlabel(0(0.25) 1 , labsize(small) nogrid) ///
	ylabel(0(0.25) 1 , labsize(small) nogrid) ///
	xtitle("Standardized city job losing rate" , size(small) ) ///
	ytitle("Contributions of employer, city and worker effects" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(3 2 1) cols(3) symysize(5) ///
		  label(1 "Worker (`ShareWorker'%)") ///
		  label(2 "City (`ShareCity'%)") ///
		  label(3 "Employer (`ShareJob'%)") ///
		  region(lcolor(white)) size(vsmall) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

}

* TABLE with several specifications
{

foreach nq in 10 20 50 100 300 {

	global NQ = `nq'
		
	* SIRET: NO CITY FE
	foreach group in siret siretcs2 siretcs3 siretcs4 { 

		use "temp/Worker`group'_NQ${NQ}_EU_03", clear
		
		corr EU `group'FE WorkerFE [w=qEU], cov
		mat Cov = r(C)
		svmat Cov , names("Cov")
		drop Cov2 Cov3 
		keep Cov
		
		rename Cov `group'_$NQ
		keep if _n <= 4
		replace `group' = 0 if _n == 4
		
		gen des     = "Var EU" 		  if _n == 1
		replace des = "Cov EU-Firm"   if _n == 2
		replace des = "Cov EU-Worker" if _n == 3
		replace des = "Cov EU-City" if _n == 4
			
		* put to %
		local total = `group'[2] + `group'[3]
		replace `group' = 100 * `group' / `total'
		
		replace `group' = 100 if _n == 1
		
		save "temp/VarDecomp`group'`nq'", replace
		
	}
	
	* SIREN: CITY FE
	foreach group in siren sirencs2 sirencs3 sirencs4 { 

		use "temp/Worker`group'_NQ${NQ}_EU_03", clear
		
		corr EU `group'FE WorkerFE CityFE [w=qEU], cov
		mat Cov = r(C)
		svmat Cov , names("Cov")
		keep Cov1
		
		rename Cov `group'_$NQ
		keep if _n <= 4
		
		gen des     = "Var EU" 		  if _n == 1
		replace des = "Cov EU-Firm"   if _n == 2
		replace des = "Cov EU-Worker" if _n == 3
		replace des = "Cov EU-City" if _n == 4
		
		* put to %
		local total = `group'[1]
		replace `group' = 100 * `group' / `total'
		
		save "temp/VarDecomp`group'`nq'", replace
		
	}
	
}

clear
set obs 4
gen des     = "Var EU" 		  if _n == 1
replace des = "Cov EU-Firm"   if _n == 2
replace des = "Cov EU-Worker" if _n == 3
replace des = "Cov EU-City" if _n == 4

foreach nq in 10 20 50 100 300 {
	foreach group in siret siretcs2 siretcs3 siretcs4 siren sirencs2 sirencs3 sirencs4 {
	merge 1:1 des using "temp/VarDecomp`group'`nq'", nogen keep(3)
	}
}

export delimited using "VarDecompWorkerFirm.csv", replace


}
		
* Moves to and from city
{
	
use EU gid  year q ze siren E* U* N* Mig* using "dads_6" if year >= 1997 & EU != . , clear	

gegen CityEU = mean(EU), by(ze)
gegen FirmEU = mean(siren), by(ze)

rename gid Worker
rename ze City

* merge with movers
merge m:1 Worker using "temp/Workersiren_NQ10_EUQWorker", nogen keep(3)
merge m:1 siren using "temp/Workersiren_NQ10_EUQsiren", nogen keep(3)
merge m:1 City using "temp/Workersiren_NQ10_EUQCity", nogen keep(3)

rename siren Firm

gsort Worker year q
order Worker year q City Firm Mig*

foreach var in Worker Firm City {
	gegen EU`var' = mean(EU), by(`var')
}

save temp/dads_6_movers_01, replace


* event study design
use temp/dads_6_movers_01, clear

gsort Worker year q
bys Worker: gen SumMig = sum(Mig)
gegen MaxMig = max(SumMig), by(Worker)

rename EUQsiren EUQFirm

* start with individuals who move only once
keep if MaxMig == 1
gcollapse (mean) EU* CityEU , by(Worker SumMig)

gsort Worker SumMig
bys Worker: keep if _N == 2
foreach var in EU CityEU EUFirm EUCity EUQCity EUQFirm {
	bys Worker: gen d`var' = `var' - `var'[_n-1]
}

reg dEU dCityEU // 1.6
estimates store dEU1

reg dEU dCityEU dEUFirm // -0.06
estimates store dEU2

reg dEU dCityEU dEUFirm if dEUQCity <= 0 // 0.13
estimates store dEU3

reg dEU dCityEU dEUFirm if dEUQCity >= 0 // -0.45
estimates store dEU4


reg dEU dEUCity // 1.2
estimates store dEU11

reg dEU dEUCity dEUFirm // 0.41
estimates store dEU12

reg dEU dEUCity dEUFirm if dEUQCity <= 0 // 0.32
estimates store dEU13

reg dEU dEUCity dEUFirm if dEUQCity >= 0 // 0.68
estimates store dEU14

reg dEU dEUCity dEUFirm if dEUQFirm <= 0
estimates store dEU15

reg dEU dEUCity dEUFirm if dEUQFirm >= 0
estimates store dEU16


* Export tables				 
esttab dEU1 dEU2 dEU3 dEU4 dEU11 dEU12 dEU13 dEU14 dEU15 dEU16 using "${tables}/dEU073122.tex", replace ///
	booktabs b(3) se(3) alignment(l l) nocons ///
	mgroups("No split" "Split by city" "No split" "Split by city" "Split by firm", pattern(1 0 1 0 1 0 1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	mtitles("All" "All" "Down" "Up" "All" "All" "Down" "Up" "Down" "Up") ///
	title(Changes in individual EU probability upon moving.) ///
	coeflabel(dCityEU "$\D$ city EU prob. (all)" ///
			  dEUCity "$\D$ city EU prob. (movers)" ///
			  dEUFirm "$\D$ firm EU prob. (movers)") ///
	order(dCityEU dEUCity dEUFirm)	///	  
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 , fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			"\tiny First four columns: city-level EU probability computed using all individuals. Last four columns: city-level EU probability computed using only movers." ///	
			 "\tiny All: linear regression using all movers in sample that move exactly once. Down: restricting to individuals who move to cities with weakly lower EU probability." ///
			 "\tiny Up: restricting to individuals who move to cities with weakly higher EU probability.")


}

}

*** Variance decompositions of local EU onto skills and industry
{
	
** FE regressionns by industry
{ 

* Aggregate by group
global OptionDropStayers = 0

foreach var in EU UE U {
	use `var' gid ze gapet2 cs2 year using "dads_6" if year >= 1997 & `var' != . , clear

	drop year
	rename gid Worker
	rename ze City
	rename gapet2 Ind
	rename cs2 Occ

	drop if Ind == . | City == . | Worker == . | Occ == .

	gen q`var' = 1

	gcollapse (sum) `var' q`var' , by( City Occ Ind )

	replace `var' = `var' / q`var'

	* keep only "movers"
	if $OptionDropStayers == 1 {
		gegen NumInd  = nvals(Ind), by(Occ City)
		gegen NumOcc  = nvals(Occ), by(Ind City)
		gegen NumCity = nvals(City), by(Occ Ind)
		
		drop if NumInd <= 2 | NumOcc <= 2 | NumCity <= 2
		drop Num*
	}

	save "temp/Group`var'_01", replace

}

* run FE regression at group level
foreach var in EU UE U {

	use "temp/Group`var'_01", clear

	reghdfe `var' [w=q`var'], a(IndFE = Ind OccFE = Occ CityFE = City)
	local varliste `var' IndFE OccFE CityFE

	foreach vari in `varliste' {
		replace `vari' = `vari' * q`var'
	}
				
	gcollapse (sum) `varliste' q`var' , by(City)
				
	foreach vari in `varliste' {
		replace `vari' = `vari' / q`var'
	}
	
	save "temp/Group`var'_02", replace
	
}
	
* merge
foreach var in EU UE U {
	
	use "temp/Group`var'_02", clear
	gcollapse (mean) CityFE (sum) q`var', by(City)
	rename CityFE `var'CityFE
	save "temp/Group`var'_03", replace
	
}

use "temp/GroupEU_03", clear
foreach var in U UE {
	merge 1:1 City using "temp/Group`var'_03", nogen keep(3)
}
keep if qEU >= 1000	
save "temp/GroupUEUUE_04", replace

* produce scatterplot
use "temp/FranceScatter", clear
merge 1:1 City using "temp/GroupUEUUE_04", nogen keep(3)

foreach var in EU UE U {
	qui su `var' [w=pop]
	gen Pred`var' = r(mean) + `var'CityFE
}
gen lPredEU  = log(PredEU)
gen lPredUE  = log(PredUE)
gen mlPredUE = - lPredUE
gen lPredUc = log(PredU/(1-PredU))


foreach var in lPredEU mlPredUE lPredUc {
	qui su `var' [w=pop]
	replace `var' = `var' - r(mean)
}


local name FranceGroup_EU053121
scatter lEU     lUc     [ w = pop ] , msymbol(circle_hollow) mlcolor("$MyBlue") msize(1.25) ||  /// 
scatter lPredEU lPredUc [ w = pop ] , msymbol(square_hollow) mcolor("$MyGreen") msize(1.25) || /// 
	|| function y = x , range(-0.75 0.75) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.75 0.75) titlegap(3) ) yscale(range(-0.75 -0.75) titlegap(3) ) ///
	xlabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	ylabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("log job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(1 2) ///
		  label(1 "Unconditional") ///
		  label(2 "City residuals") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace


local name FranceGroup_UE053121
scatter mlUE     lUc     [ w = pop ] , msymbol(circle_hollow) mlcolor("$MyBlue") msize(1.25) ||  /// 
scatter mlPredUE lPredUc [ w = pop ] , msymbol(square_hollow) mcolor("$MyGreen") msize(1.25) || /// 
	|| function y = x , range(-0.75 0.75) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-0.75 0.75) titlegap(3) ) yscale(range(-0.75 -0.75) titlegap(3) ) ///
	xlabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	ylabel(-0.75(0.25)0.75 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("log job finding rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(1 2) ///
		  label(1 "Unconditional") ///
		  label(2 "City residuals") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

}

* Scatterplots by industry
{

* import sector names
{	
import excel "${root2}/doc/naf2003_liste_n3.xlsx", sheet("naf2003_liste_n3") clear

rename A apet2
rename B name
drop if _n <= 2

gen apet2n = apet2
destring apet2n, replace force

gen sector = ""
replace sector = "agriculture"    if apet2n <= 5
replace sector = "mining"         if apet2n >= 10 & apet2n <= 14
replace sector = "manufacturing"  if apet2n >= 15 & apet2n <= 37
replace sector = "utilities"      if ( apet2n >= 40 & apet2n <= 41 ) | apet2n == 90
replace sector = "construction"   if apet2n == 45
replace sector = "retail"         if apet2n >= 50 & apet2n <= 52
replace sector = "hospitality"    if apet2n == 55
replace sector = "transportation" if apet2n >= 60 & apet2n <= 64
replace sector = "FIRE" 	      if apet2n >= 65 & apet2n <= 71
replace sector = "other services" if apet2n >= 72
	* includes 
replace sector = "personal services" if inlist(apet2n,93,95)
replace sector = "public, education, health" if apet2n == 75 & apet2n <= 85

gen tradable = 1       if inlist(sector,"agriculture", ///
								        "mining", ///
								        "manufacturing")
replace tradable = 0   if inlist(sector,"construction", ///
								        "retail", ///
								        "hospitality", ///
									    "personal services")
replace tradable = 2 if inlist(sector,"utilities", ///
								        "FIRE", ///
								        "other services", ///
									    "public, education, health", ///
										"transportation")
	* 2 means in between tradable and non-tradable									
save temp/ListeNaf2003Names, replace
}

* compute city bins based on overall U-rate
{
	
use "temp/FranceUrate", clear
xtile CityBin = U [fw=pop], nq(10)

gegen UBin = sum( pop * U ), by(CityBin)
gegen SumPopBin = sum( pop ), by(CityBin)
replace UBin = UBin / SumPopBin

keep City CityBin UBin

save temp/CityBinAll, replace

}

* aggregate by city bin and tradable sector
{
		
use gid year q U E UE EU NE NU UN EN EE labforce ze year apet2 using "dads_6", clear
keep if year >= 1996 & year <= 2007
rename ze City
drop if City == .
keep if labforce == 1

merge m:1 apet2 using temp/ListeNaf2003Names
drop if _m == 2
drop _m

* Weight observations
gen wt = 1
gen pop = 1
merge m:1 City using temp/CityBinAll, nogen keep(3)
gcollapse (mean) U UBin UE EU  (sum) pop E , by( CityBin tradable )

save "temp/FranceUrateAgSector", replace
}

* graphs by tradability
{
	
use temp/FranceUrateAgSector, clear

global axis = 0.75

* define log variables
foreach var in EU UE U UBin {
	gen l`var' = log(`var')
}
gen mlUE = - lUE

gen lUmU = log(U/(1-U))
gen lUmUBin = log(UBin/(1-UBin))

foreach var in lEU lUE mlUE lUmU {
qui su `var' [fw = pop]
replace  `var' = `var' - `r(mean)'
}

qui su lUmUBin [fw = pop]
replace lUmUBin = lUmUBin - `r(mean)'

gen sample = abs(lUmU) <= $axis
gen sampleBin = abs(lUmUBin) <= $axis

* define employment shares
egen SumE = sum(E), by(CityBin)
gen Eshare = E / SumE


* graphs: EU
local name FranceEUTradables
scatter lEU lUmUBin if sampleBin & tradable == 2 , msymbol(D) mcolor("$MyLightGreen") msize(2.5) || ///
scatter lEU lUmUBin if sampleBin & tradable == 1 , msymbol(C) mcolor("$MyBlue") msize(2.5) || ///
scatter lEU lUmUBin if sampleBin & tradable == 0 , msymbol(S) mcolor("$MyGreen") msize(2.5) || ///
function y = x , range(-$axis $axis) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-$axis $axis) titlegap(3) ) yscale(range(-$axis $axis) titlegap(3) ) ///
	xlabel(-${axis}(0.25)$axis , labsize(small) nogrid) ///
	ylabel(-${axis}(0.25)$axis , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("log job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 3 1) cols(3) ///
		  label(2 "Tradables") ///
		  label(3 "Non-tradables") ///
		  label(1 "Intermediate tradability") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace


* graphs: UE
local name FranceUETradables
scatter mlUE lUmUBin if sampleBin & tradable == 2 , msymbol(D) mcolor("$MyLightGreen") msize(2.5) || ///
scatter mlUE lUmUBin if sampleBin & tradable == 1 , msymbol(C) mcolor("$MyBlue") msize(2.5) || ///
scatter mlUE lUmUBin if sampleBin & tradable == 0 , msymbol(S) mcolor("$MyGreen") msize(2.5) || ///
function y = x , range(-$axis $axis) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(12.5) ysize(14) ///
	xscale(range(-$axis $axis) titlegap(3) ) yscale(range(-$axis $axis) titlegap(3) ) ///
	xlabel(-${axis}(0.25)$axis , labsize(small) nogrid) ///
	ylabel(-${axis}(0.25)$axis , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("- log job finding rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 3 1) cols(3) ///
		  label(2 "Tradables") ///
		  label(3 "Non-tradables") ///
		  label(1 "Intermediate tradability") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace



* graphs: employment shares
local name FranceEmpShareTradables
scatter Eshare lUmUBin if sampleBin & tradable == 2 , msymbol(D) mcolor("$MyLightGreen") msize(2.5) || ///
scatter Eshare lUmUBin if sampleBin & tradable == 1 , msymbol(C) mcolor("$MyBlue") msize(2.5) || ///
scatter Eshare lUmUBin if sampleBin & tradable == 0 , msymbol(S) mcolor("$MyGreen") msize(2.5) ///
	xscale(range(-$axis $axis) titlegap(3) ) yscale(range(0.1 0.6) titlegap(3) ) ///
	, xsize(12.5) ysize(14) ///
	xlabel(-${axis}(0.25)$axis , labsize(small) nogrid) ///
	ylabel(0.1(0.1)0.6 , labsize(small) nogrid) ///
	xtitle("log unemployment / employment" , size(small) ) ///
	ytitle("employment share" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(2 3 1) cols(3) ///
		  label(2 "Tradables") ///
		  label(3 "Non-tradables") ///
		  label(1 "Intermediate tradability") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace



}

}

}

*** Vacancies and tightness
{

** Aggregate Acemo to ZE level
{

use "Acemo0307", clear

* adjust emp when 0
replace emp = 1 if emp == 0

gen VacRate = Vacancies / emp
su VacRate, d
su VacRate if VacRate > 0 , d
drop if Vacancies == .
drop if emp == .

replace Diff = Diff * emp
gen apet2 = substr(apet,1,2)

* Collapse by city and industry to take out industry effects
gcollapse (sum) Vacancies Diff emp, by(ze apet2)

	* In logs
gen lVacancies = log(Vacancies)
reghdfe lVacancies , a(lVacInd = apet2 ze)
gen VacanciesRes = exp( lVacancies - lVacInd )

	* In levels
reghdfe Vacancies , a(VacInd = apet2 ze)
gen VacanciesRes2 = Vacancies - VacInd 
	
fcollapse (sum) Vacancies VacanciesRes VacanciesRes2 Diff emp, by(ze)

gen VacPerEmp = Vacancies / emp
gen VacResPerEmp = VacanciesRes / emp
gen VacRes2PerEmp = VacanciesRes2 / emp

replace Diff = Diff / emp

save "Acemo0307Ze", replace

}

** Merge with U-rate and EE-rate data
{

* Prepare DADS data for 2003 to 2007, same years as acemo
{

use U UE EU NE NU UN EN EE labforce ze year using "dads_6", clear
keep if year >= 2003 & year <= 2007
rename ze City
drop if City == .

gen pop = 1
local varliste U labforce UE EU EN NE NU UN EE
fcollapse (mean) `varliste' (sum) pop , by( City )
save "temp/FranceUrateForVac", replace

}

* Estimate contact rate of employed with EE transitions (all or up) on same years as acemo
{

use EE E lq gid year using "dads_6" if year >= 2003 & E == 1, clear

global NQw = 100
sort lq
gen rank = _n
replace rank = rank / rank[_N]

gen wQ = .
forvalues i=1(1)$NQw {
	di `i'
	replace wQ = `i' if rank > (`i'-1)/$NQw & rank <= `i'/$NQw 
}

collapse (mean) EE lq (semean) se_EE = EE se_lq = lq, by( wQ )

save "temp/EEbyWage", replace



use EE E lq gid siren year q using "dads_6" if year >= 2003 & E == 1, clear
order gid year q siren lq

sort gid year q
bys gid: gen EEplus = ( lqgwc[_n+1] > lqgwc[_n] ) * EE if _n < _N
bys gid: replace EEplus = 0 if EE == 0 & _n == _N

global NQw = 100
sort lq
gen rank = _n
replace rank = rank / rank[_N]

gen wQ = .
forvalues i=1(1)$NQw {
	di `i'
	replace wQ = `i' if rank > (`i'-1)/$NQw & rank <= `i'/$NQw 
}

collapse (mean) EEplus lq (semean) se_EEplus = EEplus se_lq = lq, by( wQ )

save "temp/EEplusbyWage", replace



use "temp/EEbyWage", clear
merge 1:1 wQ using "temp/EEplusbyWage", nogen keep(3)

local p1 = floor( 1000 * EE[1] ) / 1000
local se_p1 = floor( 1000 * se_EE[1] ) / 1000

local p1p = floor( 1000 * EEplus[1] ) / 1000
local se_p1p = floor( 1000 * se_EEplus[1] ) / 1000

local name EEbyWage060121
scatter EE wQ    , msymbol(C) mlcolor("$MyBlue") mcolor("$MyBlue") ///
				connect(l) lcolor("$MyBlue") || ///
scatter EEplus wQ , msymbol(S) mlcolor("$MyOrange") mcolor("$MyOrange") ///
				connect(l) lcolor("$MyOrange") ///
	, xsize(14) ysize(14) ///
	xscale( titlegap(3)  ) yscale( titlegap(3)  ) ///
	ylabel( , nogrid) ///
	xtitle("Daily wage percentile"  ) ///
	ytitle("Job-to-job mobility probability" ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on cols(2) order(1 2) ///
		   label(1 "All JtJ transitions") ///
		   label(2 "Only w/ wage increase") ///					
		   region(lcolor(white)) size(small) ) ///
	name(`name', replace) ///
	note("JtJ at first percentile" ///
		 "All:           `p1' (`se_p1')" ///
		 "Increase: `p1p' (`se_p1p')")
graph export "${graphs}/`name'.pdf", name(`name') replace

global lambdaEtimesSurvival = EE[1] // precisely estimated, se = 0.0013, vs. point estimate ot 0.162

use "temp/FranceUrateForVac", clear
su UE [w=pop]
global lambdaUtimesSurvival = r(mean)

global RelSearchEmployed = $lambdaEtimesSurvival / $lambdaUtimesSurvival
di $RelSearchEmployed // 0.92

}

* Merge with vacancies
global RelSearch = $RelSearchEmployed
global NameRelSearch High
{

use "Acemo0307Ze", clear

rename ze City

merge 1:1 City using "temp/FranceUrateForVac", nogen keep(3)

gen Out = 1 - labforce

* two-state model
gen JobSeek1   = U 	/ ( 1 - U )	
	* unemployed only
gen JobSeek2   = ( U + $RelSearch * ( 1 - U )	) / ( 1 - U )	
	* unemployed and employed
gen JobSeek3   = ( U + 0.3 * ( 1 - U )	) / ( 1 - U )	
	* unemployed and employed, alternative relative search intensity

* for alternative measures
gen AltJobSeek1   = U
	* unemployed only
gen AltJobSeek2   = U + $RelSearch * ( 1 - U )	
	* unemployed and employed
gen AltJobSeek3   = U + 0.3 * ( 1 - U )
	* unemployed and employed, alternative relative search intensity

save "temp/Acemo0307Ze_01", replace

}

}

** Aggregate by deciles of unemployment
{
	
use "temp/Acemo0307Ze_01", clear
global NQTheta = 10
sort U
gen rank = sum(emp)
replace rank = rank / rank[_N]

replace pop = emp / (1-U)

gen EUQ = .
forvalues i=1(1)$NQTheta {
	di `i'
	replace EUQ = `i' if rank > (`i'-1)/$NQTheta & rank <= `i'/$NQTheta 
}

foreach var in EU U Diff AltJobSeek1 AltJobSeek2 AltJobSeek3 {
	replace `var' = `var' * emp
}

gen n = 1

collapse (sum)  Vacancies VacPerEmp VacResPerEmp VacRes2PerEmp ///
			    JobSeek1 JobSeek2 JobSeek3 ///
				AltJobSeek1 AltJobSeek2 AltJobSeek3 ///
				EU U emp pop n Diff , by( EUQ )


foreach var in EU U Diff AltJobSeek1 AltJobSeek2 AltJobSeek3 {
	replace `var' = `var' / emp
}
* replace Diff = Diff / n

foreach i in 1 2 3 {
	
	* use measure of vacancies per workers aggregated by city
	* to account for possible non-representativeness
	* of ACEMO survey at city level
	* then adjust by employment size at the bin level in JobSeek`i'
	gen Theta`i' 		= VacPerEmp 	/ JobSeek`i'
	gen ThetaRes`i' 	= VacResPerEmp 	/ JobSeek`i'
	gen ThetaRes2`i' 	= VacRes2PerEmp / JobSeek`i'

	gen lTheta`i' 		= log(Theta`i')
	gen lThetaRes`i' 	= log(ThetaRes`i')
	gen lThetaRes2`i' 	= log(ThetaRes2`i')
	
	* direct measure
	gen AltTheta`i'     = Vacancies / ( pop * AltJobSeek`i' )
	gen lAltTheta`i' 	= log(AltTheta`i')
	
}

gen lVacPop = log(Vacancies/pop)

foreach i in 1 2 3 {
	foreach var in lTheta`i' lAltTheta`i' lThetaRes`i' lThetaRes2`i' {
		qui su `var'
		replace `var' = `var' - r(mean)
	}
}
qui su lVacPop
replace lVacPop = lVacPop - `r(mean)'

save "temp/Acemo0307Ze_02", replace


}

** Figures with bin-scatter
{

use "temp/Acemo0307Ze_02", replace

gen lUmU = log(U/(1-U))
qui su lUmU
replace lUmU = lUmU - `r(mean)'
replace U = 100 * U

gen lU = log(U)
qui su lU
replace lU = lU - `r(mean)'

* for beveridge curve

* Regressions
foreach i in 1 2 3 {
	
	reg lTheta`i' U

	global slope`i' : di %3.2f _b[ U ]
	global cons`i' : di %3.2f _b[ _cons ]
	global se`i' : di %3.2f _se[ U ]
	
	reg lTheta`i' lUmU

	global UmUslope`i' : di %3.2f _b[ lUmU ]
	global UmUcons`i' : di %3.2f _b[ _cons ]
	global UmUse`i' : di %3.2f _se[ lUmU ]
	
	reg lAltTheta`i' lUmU

	global AltUmUslope`i' : di %3.2f _b[ lUmU ]
	global AltUmUcons`i' : di %3.2f _b[ _cons ]
	global AltUmUse`i' : di %3.2f _se[ lUmU ]

}

reg lVacPop lUmU
global VacPopslope : di %3.2f _b[ lUmU ]

gen lVacPop2 = lTheta1 + log(U)
qui su lVacPop2
replace lVacPop2 = lVacPop2 - `r(mean)'

reg lVacPop2 lUmU
global VacPop2slope : di %3.2f _b[ lUmU ]

reg lVacPop2 lU
global VacPop3slope : di %3.2f _b[ lU ]


* Graph: tightness and U-rate
local name Tightness072421
scatter lTheta1 U , msymbol(circle_hollow) mcolor("$MyBlue") msize(3)  || ///
scatter lTheta2 U , msymbol(O) mcolor("$MyOrange") msize(3) || ///
function y = $cons1 + $slope1 * x , range(6 14) clwidth(0.3) lcolor("$MyBlue") clpattern(dash) || ///
function y = $cons2 + $slope2 * x , range(6 14) clwidth(0.5) lcolor("$MyOrange") ///
	 xsize(10) ysize(7) ///
	 xlabel(,labsize(small) nogrid ) ylabel(,labsize(small) nogrid ) ///
	 xscale(range(6 14) titlegap(3) ) yscale(range(-0.3 0.3) titlegap(3) ) ///
	 xlabel(6(2)14) ylabel(-0.3(0.15)0.3) ///
	 xtitle("Unemployment rate (p.p.)",size(small)) ytitle("Log tightness",size(small)) ///
	 legend(on cols(1) order(1 2) size(small) 	///
			   label(1 "No job-to-job correction             (slope = $slope1 ($se1))") ///
			   label(2 "Estimated job-to-job correction (slope =  $slope2 ($se2))") ///
			   region(lcolor(white)) ) ///
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 name(`name', replace) 
graph export "${graphs}/`name'.pdf", name(`name') replace

* Graph: tightness and U-E ratio
local name Tightness082421
scatter lTheta1 lUmU , msymbol(circle_hollow) mcolor("$MyBlue") msize(3)  || ///
scatter lTheta2 lUmU , msymbol(O) mcolor("$MyOrange") msize(3) || ///
function y = $UmUcons1 + $UmUslope1 * x , range(-0.5 0.5) clwidth(0.3) lcolor("$MyBlue") clpattern(dash) || ///
function y = $UmUcons2 + $UmUslope2 * x , range(-0.5 0.5) clwidth(0.5) lcolor("$MyOrange") ///
	 xsize(10) ysize(10) /// 
	 xlabel(,labsize(small) nogrid ) ylabel(,labsize(small) nogrid ) ///
	 xscale(range(-0.5 0.5) titlegap(3) ) yscale(range(-0.5 0.5) titlegap(3) ) ///
	 xlabel(-0.5(0.25)0.5) ylabel(-0.5(0.25)0.5) ///
	 xtitle("Log unemployment-to-employment ratio",size(small)) ytitle("Log tightness",size(small)) ///
	 legend(on cols(2) order(1 2) size(small) 	///
			   label(1 "No job-to-job correction") ///
			   /// label(2 "Ad-hoc job-to-job correction      (slope =  $slope3 ($se3))") ///
			   region(lcolor(white)) ) ///
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 name(`name', replace) 
graph export "${graphs}/`name'.pdf", name(`name') replace
* slopes are -.5121205 and .4677052, respectively

}

** Regression tables
{
	
use "temp/Acemo0307Ze_02", clear

replace U = 100 * U
replace EU = 100 * EU

drop pop
gen pop = emp * ( 1 + U )

local wt emp
* Regressions
foreach i in 1 2 3 {
	
	reg lTheta`i' U [w=`wt']
	estimates store lTheta`i'U
	
	reg lThetaRes`i' U [w=`wt']
	estimates store lThetaRes`i'U
	
	reg lTheta`i' EU [w=`wt']
	estimates store lTheta`i'EU
	
	reg lThetaRes`i' EU [w=`wt']
	estimates store lThetaRes`i'EU

}

* Export tables				 
esttab lTheta1U lThetaRes1U lTheta3U lThetaRes3U lTheta2U lThetaRes2U using "${tables}/TightnessU072421.tex", replace ///
	booktabs b(3) se(3) alignment(l l) nocons ///
	mgroups("No JtJ corr." "Ad hoc JtJ corr." "Est. JtJ corr.", pattern(1 0 1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	mtitles("Raw" "Res." "Raw" "Res." "Raw" "Res.") ///
	title(Log tightness onto unemployment rate. City group level.) ///
	coeflabel(U "U-rate") ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 , fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			 "\tiny Regressions population-weighted. First two columns: tightness = V/U. Third and fourth columns:" ///
			 "\tiny tightness = V/S, where S includes a 0.3 relative search intensity of employed workers." ///
			 "\tiny Fifth and sixth columns: tightness = V/S, where S includes the estimated (0.92) relative" ///
			 "\tiny search intensity of employed workers. Raw = raw vacancies. Res. = using vacancies residualized" ///
			 "\tiny from industry fixed effects.")
			 
			 
* Export tables				 
esttab lTheta1EU lThetaRes1EU lTheta3EU lThetaRes3EU lTheta2EU lThetaRes2EU using "${tables}/TightnessEU072421.tex", replace ///
	booktabs b(3) se(3) alignment(l l) nocons ///
	mgroups("No JtJ corr." "Ad hoc JtJ corr." "Est. JtJ corr.", pattern(1 0 1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	mtitles("Raw" "Res." "Raw" "Res." "Raw" "Res.") ///
	title(Log tightness onto EU rate. City group level.) ///
	coeflabel(EU "EU-rate") ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 , fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			 "\tiny Regressions population-weighted. First two columns: tightness = V/U. Third and fourth columns:" ///
			 "\tiny tightness = V/S, where S includes a 0.3 relative search intensity of employed workers." ///
			 "\tiny Fifth and sixth columns: tightness = V/S, where S includes the estimated (0.92) relative" ///
			 "\tiny search intensity of employed workers. Raw = raw vacancies. Res. = using vacancies residualized" ///
			 "\tiny from industry fixed effects.")
					 

	
}

}

*** Productivity and EU by ZE and by firm
{

* Regressions by separation rate
{

global dataset Ficus9706
use "$dataset", clear

if "$dataset"=="Ficus9706" {
	global va va 
	global emp emp
}
if "$dataset"=="MultiplePlantVA0207" {
	global va vat
	global emp empt
}

keep if $va > 0 & sales > 0
drop if $va == .
keep if $emp > 5

drop if ze == 1101

rename Entry Entrant
rename EUZePanel EU

replace EU = 100 * EU

gen Entrant_EU      = Entrant * EU
gen Entrant_Skill   = Entrant * Skill
gen Incumbent_EU    = ( 1 - Entrant ) * EU
gen Incumbent_Skill = ( 1 - Entrant ) * Skill

* Regressions
reghdfe lvapw Incumbent_EU Entrant_EU Entrant, a(gape2 year) cluster(gape2#ze)
estimates store prod1

reghdfe lvapw Incumbent_EU Entrant_EU Entrant Incumbent_Skill Entrant_Skill, a(gape2 year)  cluster(gape2#ze)
estimates store prod2

reghdfe lva Incumbent_EU Entrant_EU Entrant Incumbent_Skill Entrant_Skill, a(gape2 year)  cluster(gape2#ze)
estimates store prod3

su lvapw [w=emp], d
global p50 = r(p50)

reghdfe dlvapw Incumbent_EU if lvapw > $p50 , a(gape2 year) cluster(gape2#year)
estimates store prod4

reghdfe dlvapw Incumbent_EU Incumbent_Skill  if lvapw > $p50 , a(gape2 year)  cluster(gape2#year)
estimates store prod5

* Table	for separation rate
local Results prod1 prod2 prod3 prod4 prod5
local RefCat EU 	"\textit{Geography}"
local OrderVar Incumbent_EU Entrant_EU
local rhslabperc Incumbent_EU 					"\hskip3mm Incumbent $\times$ Separation rate" ///			
				 Entrant_EU 					"\hskip3mm Entrant $\times$ Separation rate" ///
				 Incumbent_Skill 				"\hskip3mm Skill mix"
estfe `Results' , ///
	labels(year 		"\hskip3mm Year" ///
		   gape2		"\hskip3mm 2-digit industry")		  

di `r(indicate_fe)'

* Export tables				 
esttab `Results' using "${tables}/SinglePlantReg.tex", replace ///
	booktabs b(3) se(3) alignment(l l) ///
	mgroups("Level" "Growth rate", pattern(1 0 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	mtitles("VA/N" "VA/N" "VA" "VA/N" "VA/N") ///
	title(Plant-level regressions.) ///
	refcat(`RefCat', nolabel) /// 
	order(`OrderVar' "\textit{Controls}") /// 
	coeflabel(`rhslabperc') ///
	indicate(`r(indicate_fe)' Incumbent_Skill, labels("\multicolumn{1}{c}{\checkmark}"  "")) ///
	drop(Entrant Entrant_Skill _cons) ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 r2_within, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis, clustered by city and 2-digit industry." ///
			 "\tiny $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$. Annual frequency, 1997-2006. " ///
			 "\tiny Entrant defined as less than two year old. ")



}

* Productivity distributions
{
	
global dataset Ficus9706
use "$dataset", clear

if "$dataset"=="Ficus9706" {
	global va va 
	global emp emp
}
if "$dataset"=="MultiplePlantVA0207" {
	global va vat
	global emp empt
}

keep if $va > 0 & sales > 0
drop if $va == .
keep if $emp > 5
drop if ze == 1101

gen     EUq = 1 if EUrankUW <= 0.25
replace EUq = 2 if EUrankUW >  0.25 & EUrankUW <= 0.5
replace EUq = 3 if EUrankUW >  0.5  & EUrankUW <= 0.75
replace EUq = 4 if EUrankUW >  0.75

rename Entry Entrant
rename EUZePanel EU

reg l${va}pw i.EUq [w=$emp]
global MeanLvapwReg1 = _b[_cons]
global MeanLvapwReg4 = $MeanLvapwReg1 + _b[4.EUq]

di "Low JL mean prod = " $MeanLvapwReg1 " ; High JL mean prod = " $MeanLvapwReg4
di "Low-high JL prod gap = " $MeanLvapwReg1 - $MeanLvapwReg4

reg l${va}pw i.EUq i.EUq#Entrant [w=$emp]

global Np = 100

foreach Var in All Entrants {

preserve

local suff
if "`Var'" == "Entrants"{
keep if Entrant == 1
local suff Entrants
}

sort EUq ${va}pw
bys EUq: gen CumVapw = sum( $emp )
bys EUq: gen RankVapw = CumVapw / CumVapw[_N]
replace RankVapw = floor( RankVapw * $Np ) / $Np

egen lMeanVapw = sum( l${va}pw * $emp ), by(RankVapw EUq)
egen SumVapw = sum( $emp ), by(RankVapw EUq)
replace lMeanVapw = lMeanVapw / SumVapw


gen InvRankVapw = 1 - RankVapw
foreach var in RankVapw InvRankVapw {
replace `var' = max( min( `var' , 1 - 1/$Np ) , 1/$Np )
}

foreach var in RankVapw InvRankVapw {
gen l`var' = log(`var') 
}

gen n = 1
gcollapse (mean) lInvRankVapw lMeanVapw EU (sum) n [w=$emp], by(EUq RankVapw)	   
rename n $emp

if "`Var'" == "Entrants"{
foreach vari in lInvRankVapw lMeanVapw EU {
rename `vari' `vari'`suff'
}
}

save "temp/ProdDistrib`suff'", replace

restore

}


use "temp/ProdDistrib", clear
merge 1:1 EUq RankVapw using "temp/ProdDistribEntrants", nogen keep(1 3)

sort EUq RankVapw
keep if EUq != .

* First compute means
foreach var in Vapw VapwEntrants {
bys EUq: gen dlM`var' = lMean`var'[_n+1] - lMean`var' if _n < _N
egen Mean`var' = sum( ( 1 - RankVapw ) * dlM`var'  ), by(EUq)
bys EUq: replace Mean`var' = Mean`var' + lMean`var'[1]

global Mean`var'1 = Mean`var'[1]
global Mean`var'4 = Mean`var'[_N]
}

di "Low JL mean prod = " $MeanVapw1 " ; High JL mean prod = " $MeanVapw4
di "Low-high JL prod gap = " $MeanVapw1 - $MeanVapw4
di "Low-high JL prod gap for entrants = " $MeanVapwEntrants1 - $MeanVapwEntrants4

foreach i in 1 4 {
	
	qui su lMeanVapw if EUq == 1, d
	global lowerbound`i' = r(p90)
	global upperbound`i' = r(max)
	
	reg lInvRankVapw lMeanVapw if EUq == `i' & lMeanVapw >= ${lowerbound`i'} & lMeanVapw <= ${upperbound`i'}
	global slope`i' = _b[lMeanVapw]
}	
	
global RatioSlopes = floor( 100 * ( $slope4 - 1 ) / ( $slope1 - 1 ) ) / 100

foreach i in 1 4 {
su EU if EUq == `i'
local meanEU`i' = r(mean)
}
global RatioJL = floor( 100 * `meanEU4' / `meanEU1' ) / 100


*** For paper
global select RankVapw > 0.05 & RankVapw < 0.95
global selecttail RankVapw > 0.9

local name ProdDistrib
scatter RankVapw lMeanVapw  if EUq == 1 & $select , connect(l) clwidth(0.5) lcolor("$MyBlue") msymbol(O) msize(1)  mcolor("$MyBlue") || ///
scatter RankVapw lMeanVapw  if EUq == 4  & $select , connect(l) clwidth(0.5) lpattern(dash) lcolor("$MyOrange")  msymbol(S) msize(1) mcolor("$MyOrange") ///
     xsize(6) ysize(6) /// 
	 xline($MeanVapw4 , lcolor("$MyOrange") lwidth(0.5) lpattern(dash) ) ///
	 xline($MeanVapw1 , lcolor("$MyBlue") lwidth(0.5) ) ///
	 	 xlabel( ,labsize(small)) ylabel(,labsize(small) nogrid )  ///
	 xtitle(log labor productivity , size(small) ) ///
	 ytitle(rank , size(small) ) ///
	 xscale( titlegap(3) ) yscale( titlegap(4) ) ///
	 legend(on order(1 2) cols(1)  ///
			label(1 "Low job losing rate quartile") ///
			label(2 "High job losing rate quartile") ///
			region(lcolor(white)) size(small) ) ///
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace
 
local name ProdTail
scatter lInvRankVapw lMeanVapw if EUq == 1 & $selecttail , connect(l) clwidth(0.5) lcolor("$MyBlue") msymbol(O) msize(1)   mcolor("$MyBlue") || ///
scatter lInvRankVapw lMeanVapw  if EUq == 4 & $selecttail , connect(l) clwidth(0.5) lpattern(dash) lcolor("$MyOrange")  msymbol(S) msize(1) mcolor("$MyOrange") ///
     xsize(6) ysize(6) /// 
	 xlabel( ,labsize(small)) ylabel(,labsize(small) nogrid)  ///
	 xtitle(log labor productivity , size(small) ) ///
	 ytitle(log ( 1 - rank ) , size(small) ) ///
	 xscale( titlegap(3) ) yscale( titlegap(4) ) ///
	 legend(on label(1 "") label(2 "") cols(1) region(lcolor(white)) ) /// order(- " " " ")
	 graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	 text(-2.5 6.5  "Slope ratio        = $RatioSlopes", place(e) size(small)  color("$MyGreen")) ///
	 text(-2.6 6.5  "Job losing ratio = $RatioJL", place(e) size(small) color("$MyGreen") ) ///
	 name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace  
  
}

** EU by productivity centile
{
		
use "FirmVA9607", clear 

keep if emp >= 5

destring siren, replace force
keep siren year va emp
keep if siren != .
save "temp/FirmsProdEU9607", replace
	
use "dads_6" if E == 1, clear
merge m:1 siren year using "temp/FirmsProdEU9607"
keep if _m == 3	
	
	
	
global Np = 100
gen Q = .

gen lvapw = log(va/emp)
keep if lvapw != .

gsort lvapw
gen rank = _n / _N
forvalues i=1(1)$Np {
replace Q = `i' if rank > ( `i' - 1 ) / $Np
}
drop rank

gcollapse (mean) lvapw  EU, by(Q)

save "temp/EUprodCentiles", replace
	
	
use "temp/EUprodCentiles", clear

gen lvapw2 = lvapw^2
gen lvapw3 = lvapw^3
replace EU = 100 * EU

global lower 2.5
global upper 5.5
gen select = lvapw > $lower & lvapw < $upper
	
reg EU lvapw lvapw2 if select // lvapw3 
global Cons = _b[_cons]
global Slop1 = _b[lvapw]
global Slop2 = _b[lvapw2]

local name EUprodFirm
scatter EU lvapw if select, mcolor("$MyBlue") msymbol(C) msize(1.5) || ///
function y = $Cons + $Slop1 * x + $Slop2 * x^2  , range(2.9 5.25) lwidth(1) lcolor("$MyOrange") ///
	xsize(4) ysize(4) ///
	xscale(range($lower $upper ) titlegap(3) ) yscale(range(1 5) titlegap(3) ) ///
	xlabel(3(1)5 , labsize(small) nogrid) ///
	ylabel(1(1)5 , labsize(small) nogrid) ///
	xtitle("log labor productivity (centiles)" , size(small) ) ///
	ytitle("EU probability (p.p., by centile)" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(1 2) cols(2) ///
			  label(1 "Data") ///
			  label(2 "Quadratic fit") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

}

** EU by productivity and other firm-level variables
{

use "dads_6" if E == 1, clear
tostring siren, replace force
merge m:1 siren year using FirmVA9607, nogen keep(3)
merge m:1 apet2 using temp/ListeNaf2003Names, nogen keep(3)
save temp/dads_6_EUcorrelates, replace


use temp/dads_6_EUcorrelates, clear

keep if emp >= 10
keep if va > 0 & ebe > 0
replace Skill = Skill / 300
keep if immo_cor > 0

gen lvapw 	   = log(va/emp)
gen ls    	   = wages / va
gen inputshare = inputs / sales
gen lcap 	   = log(immo_cor)

foreach var in va emp ebe sales inputs {
	gen l`var'   = log(`var')
}

rename lq lw

gegen MeanFirmEU = mean(EU), by(siren)

* rank firms based on unconditional EU with special group for EU = 0
preserve
keep siren MeanFirmEU emp
drop if MeanFirmEU == 0
gsort siren
bys siren: keep if _n == 1

xtile FirmGroup = MeanFirmEU [fw = emp ], nq(9)
replace FirmGroup = FirmGroup + 1
keep siren FirmGroup

save temp/FirmEUgroup, replace
restore


merge m:1 siren using temp/FirmEUgroup, nogen keep(1 3)
replace FirmGroup = 1 if MeanFirmEU == 0

reghdfe EU, a( FirmFE = FirmGroup WorkerFE = gid )

replace tradable = . if ~inlist(tradable,0,1)

gcollapse (mean) tradable EU Skill lw lvapw lva lemp lebe lsales ls inputshare lcap FirmFE WorkerFE FirmGroup emp, by(siren)


save temp/dads_6_EUcorrelates_02, replace


use temp/dads_6_EUcorrelates_02, clear

replace emp = floor(emp)

gcollapse (mean) tradable EU Skill lw lvapw lva lemp lebe lsales ls inputshare lcap FirmFE WorkerFE [fw = emp] , by(FirmGroup)

egen FirmFE0 = min( FirmFE * ( _n == 2 ) )
replace FirmFE = FirmFE - FirmFE0

gen lebepw = lebe - lemp
gen lcappw = lcap - lemp

global vlist EU lvapw tradable lebepw lcappw ls Skill lw lemp lsales inputshare WorkerFE

foreach var in $vlist {
	
if "`var'" == "EU" {
	local ytitle Job losing rate
}
if "`var'" == "lvapw" {
	local ytitle Log value added per worker
}
if "`var'" == "tradable" {
	local ytitle Fraction of jobs in tradable industries
}
if "`var'" == "lebepw" {
	local ytitle Log profits per worker
}
if "`var'" == "lcappw" {
	local ytitle Log capital per worker
}
if "`var'" == "ls" {
	local ytitle Labor share
}
if "`var'" == "Skill" {
	local ytitle Skill (normalized)
}
if "`var'" == "lw" {
	local ytitle Log wage
}
if "`var'" == "lemp" {
	local ytitle Log employment
}
if "`var'" == "lsales" {
	local ytitle Log sales
}
if "`var'" == "inputshare" {
	local ytitle Intermediate share
}
if "`var'" == "WorkerFE" {
	local ytitle Worker fixed effect
}

local fontsize medium
scatter `var' FirmFE if FirmGroup > 1 , ///
   	connect(l) msymbol(O) mcolor("$MyBlue") mlcolor("$MyBlue") msize(3) ///
   	clwidth(1) lcolor("$MyBlue") clpattern(solid) || ///
	, xsize(8) ysize(6) ///
	xscale(range(0 0.15) titlegap(3) ) yscale( titlegap(3) ) ///
	xlabel(0(0.05)0.15 , labsize(`fontsize') nogrid) ///
	ylabel(			   , labsize(`fontsize') nogrid) ///
	xtitle("Firm-specific job losing rate (firm fixed effect)" , size(`fontsize') ) ///
	ytitle("`ytitle'" , size(`fontsize') ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(off order(2 1) cols(2) symysize(5) ///
		  label(1 "Worker effects (`ShareWorker'%)") ///
		  label(2 "Employer effects (`ShareJob'%)") ///
		  region(lcolor(white)) size(small) ) ///
	name(`var'FirmFE, replace)
graph export "${graphs}/`var'FirmFE.pdf", name(`name') replace
	
}

	
}


}

*** Exit rates
{

* Use exit from Postes
use "PlantsPostes0207", clear

sort siret year
egen maxyear = max(year), by(siret)
gen Exit = ( year == maxyear ) & ( maxyear < 2007 )
drop if year == 2007

destring siren, replace force
destring nic, replace force

drop siret
gen siret = 1e4 * siren + nic
drop if siret == .

keep siret year Exit

gsort siret year
bys siret year: keep if _n == 1
save "temp/PlantExit0206", replace


use "dads_6" if E == 1 & year >= 2002 & year <= 2006, clear
merge m:1 siret year using "temp/PlantExit0206"
gen EU_Exit = EU * Exit

gcollapse (mean) EU EU_Exit, by(ze)

scatter EU_Exit EU if EU < 0.08

gen EU_Inc = EU - EU_Exit

local fontsize small	
local name EUexit
scatter EU_Exit EU if EU < 0.06, msymbol(circle_hollow) mlcolor("$MyGreen") msize(1) || ///
scatter EU_Inc EU if EU < 0.06, msymbol(O) mcolor("$MyBlue") msize(1) || ///
function y = x , range(0 0.06) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(14) ysize(14) ///
	xscale(range(0 0.06) titlegap(3) ) yscale(range(0 0.06) titlegap(3) ) ///
    xlabel(0(0.02)0.06 , labsize(`fontsize') nogrid) ///
    ylabel(0(0.02)0.06 , labsize(`fontsize') nogrid) ///
	xtitle("Job losing rate" , size(`fontsize') ) ///
	ytitle("Job loss" , size(`fontsize') ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(1 2) cols(2) size(small) ///
		   label(1 "Exiting establishments") ///
		   label(2 "Surviving establishments") ///
		   region(lcolor(white)) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

	
}

*** Correlation between EU, UE wages and size
{


use "dads_6" if year >= 1997 , clear

keep if p25 == 1 // start with constant size sample

rename lq lw
rename ze City
gen w = exp(lw)

gen pop = 1

* city-wide variables
egen CityW = mean(w) , by(City year)
egen CitylW = mean(lw) , by(City year)
egen CityPop = sum(pop) , by(City year)

* standardize
foreach var in CityW CityPop {

gen l`var' = log(`var')
gen l`var'N = .

forvalues t=1997(1)2007 {
qui su l`var' if year == `t'
replace l`var'N = ( l`var' - r(mean) ) / r(sd) if year == `t'
}

}

gen CitylWN = .
forvalues t=1997(1)2007 {
qui su CitylW if year == `t'
replace CitylWN = ( CitylW - r(mean) ) / r(sd) if year == `t'
}

qui su Skill
gen SkillN = ( Skill- r(mean) ) / r(sd)

* put in 100 pp
foreach var in U EU UE {
replace `var' = 100 * `var'
su `var'
gen `var'norm = `var' / r(mean)
}

global clustervar City gapet3#year

* only consider movers
keep if mover == 1

* regressions
foreach var in U EU UE {

reghdfe `var' lCityWN lCityPopN , a(year) vce(cluster $clustervar )
estimates store `var'1

reghdfe `var' lCityWN lCityPopN SkillN , a(gapet3#year) vce(cluster $clustervar )
estimates store `var'2

reghdfe `var' lCityWN lCityPopN , a(gid gapet3#year) vce(cluster $clustervar )
estimates store `var'3

reghdfe `var' CitylWN lCityPopN , a(year) vce(cluster $clustervar )
estimates store `var'l1

reghdfe `var' CitylWN lCityPopN SkillN , a(gapet3#year) vce(cluster $clustervar )
estimates store `var'l2

reghdfe `var' CitylWN lCityPopN , a(gid gapet3#year) vce(cluster $clustervar )
estimates store `var'l3

}


* Table with log mean wage
			
local Results U1 U2 U3 EU1 EU2 EU3 UE1 UE2 UE3
			 
local OrderVar lCityWN lCityPopN SkillN

local rhslabperc lCityWN 			"\hskip3mm Log city wage" ///
				 lCityPopN 			"\hskip3mm Log city pop." ///
				 SkillN				"\hskip3mm Worker skill" 
				 
estfe `Results' , ///
	labels(year 			"\hskip3mm Year" ///
		   gapet3#year		"\hskip3mm Industry-Year" ///
		   gid				"\hskip3mm Worker")		  

di `r(indicate_fe)'

* Export tables				 
esttab `Results' using "${tables}/OLSUEUUElogmean.tex", replace ///
	booktabs b(2) se(2) alignment(l l) nomtitles ///
	mgroups("Unemployment" "Job loss" "Job finding", pattern(1 0 0 1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	title(OLS regressions of worker-level unemployment, job loss and job finding probabilities \label{table:OLSUEUUE}) ///
	order(`OrderVar' "\textit{Fixed Effects}") /// 
	coeflabel(`rhslabperc') drop( _cons ) ///
	indicate( `r(indicate_fe)' , labels("\multicolumn{1}{c}{\checkmark}"  "")) ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 r2_within, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis, two-way clustered by city and 3-digit industry. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			 "\tiny City population by km2 (density). Quarterly frequency, 1997-2007. Movers only." ///
			 "\tiny Left-hand-side variables in 100 percentage points. Right-hand-side variables standardized." ///
			 "\tiny Wages = log mean wage.")

		

* Table with mean log wage
			
local Results Ul1 Ul2 Ul3 EUl1 EUl2 EUl3 UEl1 UEl2 UEl3
			 
local OrderVar CitylWN lCityPopN SkillN

local rhslabperc CitylWN 			"\hskip3mm Log city wage" ///
				 lCityPopN 			"\hskip3mm Log city pop." ///
				 SkillN				"\hskip3mm Worker skill" 
				 
estfe `Results' , ///
	labels(year 			"\hskip3mm Year" ///
		   gapet3#year		"\hskip3mm Industry-Year" ///
		   gid				"\hskip3mm Worker")		  

di `r(indicate_fe)'

* Export tables				 
esttab `Results' using "${tables}/OLSUEUUEmeanlog.tex", replace ///
	booktabs b(2) se(2) alignment(l l) nomtitles nocons ///
	mgroups("Unemployment" "Job loss" "Job finding", pattern(1 0 0 1 0 0 1 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	title(OLS regressions of worker-level unemployment, job loss and job finding probabilities \label{table:OLSUEUUE}) ///
	order(`OrderVar' "\textit{Fixed Effects}") /// 
	coeflabel(`rhslabperc') drop( _cons ) ///
	indicate( `r(indicate_fe)' , labels("\multicolumn{1}{c}{\checkmark}"  "")) ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 r2_within, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis, two-way clustered by city and 3-digit industry. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			 "\tiny City population by km2 (density). Quarterly frequency, 1997-2007. Movers only." ///
			 "\tiny Left-hand-side variables in 100 percentage points. Right-hand-side variables standardized." ///
			 "\tiny Wages = mean log wage.")


			 
			 
			 
			 
** Regressions with normalized variables

* regressions: save to disk to load in over-id part
foreach var in Unorm EUnorm UEnorm {

reghdfe `var' lCityWN lCityPopN , a(year) vce(cluster $clustervar )
estimates store `var'1
estimates save "temp/`var'1", replace

reghdfe `var' lCityWN lCityPopN SkillN , a(gapet3#year) vce(cluster $clustervar )
estimates store `var'2
estimates save "temp/`var'2", replace

reghdfe `var' lCityWN lCityPopN , a(gid gapet3#year) vce(cluster $clustervar )
estimates store `var'3
estimates save "temp/`var'3", replace

reghdfe `var' CitylWN lCityPopN , a(year) vce(cluster $clustervar )
estimates store `var'l1
estimates save "temp/`var'l1", replace

reghdfe `var' CitylWN lCityPopN SkillN , a(gapet3#year) vce(cluster $clustervar )
estimates store `var'l2
estimates save "temp/`var'l2", replace

reghdfe `var' CitylWN lCityPopN , a(gid gapet3#year) vce(cluster $clustervar )
estimates store `var'l3
estimates save "temp/`var'l3", replace

}			 
			 
			 
			 
			 
			 
}

*** Basics for maps
{

cd "$root"

* Basic mapping between ze and polygons at Commune level
use "source/ZE2010", clear
destring CODE_ZE, replace force
rename CODE_ZE ze
drop if inlist(substr(CODE_DEPT,2,1),"A","B")
rename INSEE_COM com
keep ze com _ID
save "output/ZE2010com", replace


* Basic mapping between ze and polygons at ZE level
use "source/ZE2011", clear
destring CODE_ZE, replace force
rename CODE_ZE ze
save "output/ZE2011", replace

}

*** Mechanical correlates of job loss
{
	
* Seasonality
{

use gid year q U EU UE ze gapet3 Skill ///
	using "dads_6" if year >= 1997, clear
keep if U != .	

gen pop = 1
gcollapse (mean) U EU (sum) pop, by(ze q)
scatter EUraw ze if q == 1 & EUraw < 0.2 || ///
scatter EUraw ze if q == 4 & EUraw < 0.2

save "temp/Seasonality_02", replace

* GET UNCONDITIONAL MEANS FROM EEC
use "EEC_03", clear
gen Temp = 1 if typejob == "Temporary"
replace Temp = 0 if typejob == "Regular"

collapse (mean) Temp unemp EU [w=wt], by(q)

egen SumEU = sum(EU)
gen ShareEU = EU / SumEU

tabstat Temp unemp ShareEU , stat(mean) by(q)

gen unemp2 = 2 * unemp
graph bar Temp unemp2 , over(q)

foreach var in Temp unemp {
qui su `var'
gen `var'Demean = `var' - r(mean)
}

preserve
keep q ShareEU
rename ShareEU ShareEU_EEC
save "temp/ShareEU_EEC", replace
restore


use "temp/Seasonality_02", clear
merge m:1 q using "temp/ShareEU_EEC", nogen keep(1 3)
sort U

global umin = 0.05
global umax = 0.2
keep if pop > 100
keep if U < $umax & U > $umin // 0.15

egen EUTot = sum( EU * pop ), by(ze)
gen ShareEU = pop * EU / EUTot

forvalues q=1(1)4{
qui su ShareEU  if q == `q' [w=pop]
replace ShareEU = ShareEU  - r(mean) + ShareEU_EEC if q == `q'
reg ShareEU U if q == `q' [w=pop]
global ConstantQ`q' = _b[_cons]
global SlopeQ`q' = _b[U]
}

keep if ShareEU < 0.5 

local name Seasonality
scatter ShareEU U if q == 1 , msymbol(circle_hollow) msize(1) mlcolor(navy%30) || ///
scatter ShareEU U if q == 2, msymbol(square_hollow) mlcolor(maroon%30) msize(1)|| ///
scatter ShareEU U if q == 3 , msymbol(triangle_hollow) mlcolor(forest_green%30) msize(1)|| ///
scatter ShareEU U if q == 4 , msymbol(diamond_hollow) mlcolor(orange%30) msize(1) || ///
function  y = $ConstantQ1 + $SlopeQ1 * x , range($umin $umax) lcolor(navy) lwidth(1) || ///
function  y = $ConstantQ2 + $SlopeQ2 * x , range($umin $umax) lcolor(maroon) lpattern(dash) lwidth(1) || ///
function  y = $ConstantQ3 + $SlopeQ3 * x , range($umin $umax) lcolor(forest_green) lpattern(shortdash) lwidth(1) || ///
function  y = $ConstantQ4 + $SlopeQ4 * x , range($umin $umax) lcolor(orange) lpattern(dash_dot) lwidth(1) ///
	xsize(7) ysize(4) ///
	xscale(range($umin $umax) titlegap(3) ) yscale(range(0 0.5) titlegap(3) ) ///
	xlabel($umin(0.025)$umax , labsize(small) nogrid) ///
	ylabel(0.0(0.1)0.5 , labsize(small) nogrid) ///
	xtitle("unemployment rate" , size(small) ) ///
	ytitle("fraction of job loss per quarter" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(on order(5 1 6 2 7 3 8 4) cols(2) position(3) ///
			  label(1 "   First quarter: Jan-Mar") ///
			  label(2 "   Second quarter: Apr-Jun") ///
			  label(3 "   Third quarter: Jul-Sep") ///
			  label(4 "   Fourth quarter: Oct-Dec") ///
			  label(5 "") ///
			  label(6 "") ///
			  label(7 "") ///
			  label(8 "") ///
		  region(lcolor(white)) size(small) ) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace

}

* Job creation vs Job destruction
{

* Re-compute measures as they seem to not line up well
use "temp/PlantsCZPostesVA0206", clear
sort siret
bys siret: keep if _n == 1
keep siret minyear maxyear Entrant Exiter
save "temp/SiretsJCJD", replace

clear
set obs 5
gen year = 2001 + _n
cross using "temp/SiretsJCJD"
save "temp/SiretsYearsJCJD", replace

use "temp/SiretsYearsJCJD", clear
merge 1:1 siret year using "temp/PlantsCZPostesVA0206", nogen keep(1 3)
save "PlantsCZPostesVA0206_JCJD_01", replace


use "PlantsCZPostesVA0206_JCJD_01", clear
replace emp = 0 if emp == .
drop JobCrea JobDes
sort siret year
bys siret: gen JobCrea     = max( emp - emp[_n-1] , 0 ) if year > 2002
bys siret: gen JobDes      = max( emp[_n-1] - emp , 0 ) if year > 2002

order siret year minyear maxyear emp JobCrea JobDes
sort siret year
keep if year >= minyear & year <= maxyear + 1
gcollapse (sum) emp JobCrea JobDes (mean) EUgroup , by(ze year)

* Added dataset here
use "Ficus9706", clear
rename EUZePanel EU

keep if emp >= 5
keep if Nyears > 1

drop JobCrea JobDes

sort siren year
bys siren: gen JobCrea     = max( emp - emp[_n-1] , 0 ) if year > 2002
bys siren: gen JobDes      = max( emp[_n-1] - emp , 0 ) if year > 2002

gcollapse (sum) emp JobCrea JobDes (mean) EU , by(ze year)

sort ze year
bys ze: gen JC = JobCrea / emp[_n-1] if _n > 1
bys ze: gen JD = JobDes  / emp[_n-1] if _n > 1
drop if year == 2002

gen JR = ( JC + JD ) / 2
save "PlantsCZPostesVA0206_JCJD_02", replace

use "PlantsCZPostesVA0206_JCJD_02", clear
fcollapse (mean) JC JD JR EU emp, by(ze)
rename emp pop
replace EU = EU

reg EU JR [w=pop]
global cJR = _b[_cons]
global sJR = floor( 100 * _b[JR] ) / 100
global seJR = floor( 1000 * _se[JR] ) / 1000

global select EU < 0.04 & JR < 0.2

local name EUJR
scatter EU JR if $select [w=pop], msymbol(circle_hollow) mlcolor("$MyBlue") ///
	|| function y = $cJR + $sJR * x , range(0.04 0.18) lcolor("$MyOrange") lwidth(0.5) ///
	, xsize(14) ysize(14) ///
	xscale(range(0.04 0.18) titlegap(3) ) yscale(range(0.01 0.04) titlegap(3) ) ///
	xlabel(0.04(0.02)0.18 , labsize(small) nogrid) ///
	ylabel(0.01(0.01)0.04 , labsize(small) nogrid) ///
	xtitle("job reallocation" , size(small) ) ///
	ytitle("job losing rate" , size(small) ) ///
	graphregion(color(white) margin(b+`bm')) bgcolor(white) ///
	legend(off order(1 2) ///
		  label(1 "Raw data") ///
		  label(2 "City fixed effect") ///
		  region(lcolor(white)) size(small) ) ///
	text(0.01 0.036  "Job losing rate = cst + $sJR ($seJR) * job reallocation. R2 = 0.23", place(e) size(small)  color("$MyOrange")) ///
	name(`name', replace)
graph export "${graphs}/`name'.pdf", name(`name') replace




}

* Temp contracts (use LFS)
{

global Nd = 5
global Options_WeightedRanks = 0
global Options_AllWorkers = 0

* Temp penalty and departement-wide U-rate in EEC
{
	
use "EEC_03", clear

keep if labforce == 1
rename unemp U
gen wtUE = wt if U == 1
gen wtEU = wt if U == 0

gen wtTypeJob = wt if inlist(typejob,"Temporary","Regular") == 1


gen     FractionTemp = 1 if typejob == "Temporary"
replace FractionTemp = 0 if typejob == "Regular"

reg EU Fraction [w=wt] // 0.0128 + 0.0166 * Fraction 
	// a temp worker has a 1.6pp larger chance of separating into U
	// which is more than double of the Regular job
global Fraction = _b[Fraction]
reghdfe EU Fraction age [w=wt], a(occ2) // 0.0160 * Fraction (similar when include log wage as control)
	
	
foreach var in U UE EU {
replace `var' = `var' * wt
}
rename wt wtU


replace FractionTemp = wtTypeJob * FractionTemp

collapse (sum) wtU wtUE wtEU wtTypeJob U UE EU FractionTemp , by(dep)

foreach var in U UE EU {
replace `var' = `var' / wt`var'
}

replace FractionTemp = FractionTemp / wtTypeJob

rename wtU DepPop
drop wtUE

sort U
if $Options_WeightedRanks == 1 {
	gen rank = sum(DepPop)
}
if $Options_WeightedRanks == 0 {
	gen rank = _n
}

replace rank = rank / rank[_N]

gen Q = .
forvalues i=1(1)$Nd {
replace Q = `i' if rank > (`i'-1)/$Nd
}
rename Q DepUbin
drop rank

save "temp/EEC_Dep_01", replace

use "temp/EEC_Dep_01", clear

foreach var in U UE EU Frac {
replace `var' = `var' * DepPop
}

collapse (sum) U UE EU Frac DepPop, by(DepUbin)

foreach var in U UE EU Frac {
replace `var' = `var' / DepPop
}

save "temp/EEC_Dep_011", replace

}

* Calculate Role of Temp contracts
{
	
use "temp/EEC_Dep_011", clear
keep if inlist(DepUbin,1,5)
gen RoleTemp = ( FractionTemp[2] - FractionTemp[1] ) * $Fraction / (EU[2] - EU[1] )

di RoleTemp[1] // .14143142


}


}

}

}
							

	
							******************
							*** ESTIMATION ***
							******************
							
							
							
{
	
	
*** quarterly migration probability for unemployed
{

use "dads_6" if year >= 1997, clear

gsort gid year q
gen mig = 0
bys gid: replace mig = ze != ze[_n-1] if _n > 1

gen migToU = 0
bys gid: replace migToU = ( ze != ze[_n-1] ) * ( U[_n-1] == 1 ) if _n > 1
	* count all changes of ze (that I only see when employed)
	* with a unemployment spell in the middle
	* as a migration into unemp

* Estimate mu
su migToU
global MigQ = r(mean)

clear
set obs 1
gen MigQ = $MigQ if _n == 1
export delimited using "$matlab/QuarterlyMigProba.csv", novarnames replace

}
	
*** Labor force exit rate
{

use "dads_6" if year >= 1997 , clear

gsort gid -year -q
bys gid: gen IsInLabInFuture = sum( labforce )

gsort gid year q
gen Exit = ( IsIn[_n+1] == 0 ) * ( max(EN,UN) == 1 )

* Estimate mu
su Exit
global Exit = r(mean)

clear
set obs 1
gen Exit = $Exit if _n == 1
export delimited using "$matlab/LFExitRate.csv", novarnames replace


}

*** EU, UE, wage and population in one dataset
{

use year EU UE lqgwc ze U using "dads_6" if year >= 1997, clear
rename ze City
gen wt = 1
gen W = exp(lqgwc)

fcollapse (mean) EU UE W (sum) wt (mean) U, by(City)

keep if wt > 1000 & EU > 0 & UE > 0 & EU != . & UE != . & City != .
count // 397

sort City
order City EU UE W wt U

save "temp/LaborMarketBasics", replace
export delimited using "$matlab/LaborMarketBasics.csv", novarnames replace

keep City
save "temp/CityList", replace

}

*** separation by tenure
{

use year ze EU EUannual tenureY using "temp/TenureProfiles" if year >= 1997, clear
gen wt = 1
rename ze City

gcollapse (mean) EU = EUannual (sum) wt , by( tenureY City )

merge m:1 City using "temp/CityList", nogen keep(3)
distinct City // 301

*sort meanEU tenureY
order City tenure wt

egen meanEU = wtmean(EU) , weight(wt)
egen GroupMeanEU = wtmean(EU) , by(City) weight(wt)
egen GroupSize = sum(wt) , by( City )
rename wt GroupTenureSize

keep if tenureY == 1
rename meanEU UnconditionalGroupMeanEU
rename EU EU1

keep City UnconditionalGroupMeanEU EU1 GroupMeanEU GroupSize
order City UnconditionalGroupMeanEU EU1 GroupMeanEU GroupSize

sort City

export delimited using "$matlab/EUyear1.csv", novarnames replace


}

*** EU, UE, wage and population in two subperiods
{

use "dads_6" if year >= 1997 & p25 == 1, clear

gen yearbin = year >= 2002
rename ze City
gen pop = 1
gen W = exp(lqgwc)

fcollapse (mean) u = U EU UE W (sum) pop U , by(City yearbin)
reshape wide u U EU UE W pop , i( City ) j(year)
order City u0 U0 EU0 UE0 W0 pop0 u1 U1 EU1 UE1 W1 pop1
merge m:1 City using "temp/CityList", nogen keep(3)
distinct City // 301
sort City

export delimited using "$matlab/LaborMarketBasics2periods.csv", novarnames replace

}

*** Bartik employment IVs
{
	
use "dads_6" if year >= 1997 & E == 1 & p25 == 1 , clear
	
gen yearbin = year >= 2002
gcollapse (sum) E , by(gapet2 yearbin)
rename E IndEmp
save "temp/IndustryEmp", replace

use "dads_6" if year >= 1997 & E == 1, clear
keep if year < 2002
gcollapse (sum) E , by(gapet2 ze)
rename ze City
egen SumE = sum( E ) , by(City)
gen ShareEmp = E / SumE
save "temp/InitialIndShares", replace


use ze gapet2 year using "dads_6" if year >= 1997 , clear
gen yearbin = year >= 2002

sort ze gapet2 yearbin

bys ze gapet2 yearbin : keep if _n == 1
rename ze City

merge m:1 City gapet2 using "temp/InitialIndShares", nogen keep(1 3)
merge m:1 yearbin gapet2 using "temp/IndustryEmp", nogen keep(1 3)

gen PredEmp = ShareEmp * IndEmp
gcollapse (sum) PredEmp , by(City yearbin)

merge m:1 City using "temp/CityList", nogen keep(3)
distinct City if yearbin == 0 // 301
distinct City if yearbin == 1 // 301

reshape wide PredEmp, i(City) j(yearbin)
sort City

export delimited using "$matlab/IndEmpIV.csv", novarnames replace

}

*** migration shares
{
	
use "dads_6" if year >= 1997, clear

gsort gid year q
gen mig = 0
bys gid: replace mig = ze != ze[_n-1] if _n > 1

gen migToU = 0
bys gid: replace migToU = ( ze != ze[_n-1] ) * ( U[_n-1] == 1 ) if _n > 1

gen yearbin = year >= 2002

gcollapse (sum) mig migToU , by( ze yearbin )

rename ze City 
merge m:1 City using "temp/CityList", nogen keep(3)
distinct City if yearbin == 0 // 301
distinct City if yearbin == 1 // 301

foreach var in mig migToU {
	foreach i in 0 1 {
		egen Sum`var'`i' = sum( `var' * ( yearbin == `i' ) )
		replace `var' = `var' / Sum`var'`i' if yearbin == `i'
		drop Sum*
	}
}

reshape wide mig migToU, i(City) j(yearbin)
	
corr mig0 migToU0 // 0.992	
corr mig1 migToU1 // 0.993	

sort City
order City mig0 migToU0 mig1 migToU1

export delimited using "$matlab/MigShares.csv", novarnames replace

}
	
*** housing prices and land area
{

use "AmenitiesHousePrices2007ZE", clear
rename ze City
merge m:1 City using "temp/CityList", nogen keep(2 3)
distinct City // 297
sort City

keep City area housepriceNotaires

order City area housepriceNotaires
export delimited using "$matlab/HousePricesArea.csv", novarnames replace

	
}
		
*** acceptance data from LFS for zhat0
{

use "EEC_03", clear
destring HowFound, replace force
tab HowFound if unemp == 0 & HowFound < 10

preserve
gen SumANPE = ( HowFound == 4 & unemp == 0 ) * wt
gen SumAll  = ( HowFound < 10 & unemp == 0 ) * wt
fcollapse (sum) SumANPE SumAll
gen n = 1
gen FractionANPE = SumANPE / SumAll
keep n Fraction
save "temp/FractionJobsANPE", replace
su
restore


keep id year q labforce unemp UE *ANPE UnempDuration wt
sort id year q

* Construct unemployment spells for each individual
bys id: gen unempSpell = 1 if unemp == 1 & unemp[_n-1] != 1 & _n > 1
bys id: replace unempSpell = 1 if unemp == 1 & _n == 1

bys id: gen UnempSpell = sum(unempSpell)
order id year q unemp unempSpell UnempSpell
tab UnempSpell // 0 1 2 3 
drop unempSpell

* Keep only unemployment
keep if unemp == 1

* Clean unemployment duration by spell
sort id UnempSpell
bys id UnempSpell: gen DurationStart = UnempDuration if _n == 1

order id year q UnempSpell DurationStart

bys id UnempSpell: replace DurationStart = DurationStart[1] if _n > 1

sort id year q
bys id: gen UnempDurationMonths2 = DurationStart + ( _n - 1 ) * 3

order id year q UnempDuration*

* Drop individuals where both measures differ too much
egen MaxDev = max( abs( UnempDurationMonths - UnempDurationMonths2 ) ) , by(id)
su MaxDev, d
 
drop if MaxDev > 3

drop MaxDev UnempDurationMonths
rename UnempDurationMonths2 UnempDurationMonths


* Clean
keep if Registered == "1" & Check == "1"
keep if NumDaysSinceLastJobOffer != .
keep if NumDaysSinceLastContact != .

keep if NumDaysSinceLastJobOffer >= NumDaysSinceLastContact

drop Registered Check YearRegistered MonthRegistered

* down to 1k observations
gen QuartersSinceContact = NumDaysSinceLastContact / 90
gen QuartersSinceJob     = NumDaysSinceLastJobOffer / 90
replace UnempDuration     = UnempDurationMonths / 3

gcollapse (mean) UnempDuration QuartersSinceContact QuartersSinceJob [ w = wt ]
gen n = 1
save "temp/ContactJobDurations", replace

merge 1:1 n using "temp/FractionJobsANPE", nogen keep(3)
drop n
order Fraction UnempDuration QuartersSinceContact QuartersSinceJob
export delimited using "$matlab/ANPE.csv", novarnames replace

}
	
*** variance decomposition for over-id
{

use "temp/LaborMarketBasics", clear

gen lEU  =   log(EU)
gen lUE  =   log(UE)
gen mlUE = - log(UE)
gen pop = wt
gen lUc = log(U/(1-U))
gen lUcPredict = lEU - lUE
corr lEU lUE [ w = pop ]

* Compute time-aggregated flows
gen FpS = - log( 1 - UE - EU )
gen S   = FpS * EU / ( 1 - exp( - FpS ) )
gen F   = FpS * UE / ( 1 - exp( - FpS ) )

gen lS = log(S)
gen mlF = - log(F)
gen lUcPredictTA = lS + mlF

* Variance decomposition w/o participation

* Save as dataset
gen n = 1
gen weights = .

gen weighted = .
gen unweighted = .

foreach var in weighted unweighted {

if "`var'" == "weighted" {
replace weights = pop 
}

if "`var'" == "unweighted" {
replace weights = n
}

su U [w = weights ]
replace `var' = r(mean) if _n == 1 // aggregate U
replace `var' = r(sd) if _n == 2 // st.dev. U

su lUc [w = weights ]
replace `var' = r(sd) if _n == 3 // st.dev. log(U/(1-U))

corr lEU mlUE [ w = weights ] , cov
mat CovTA = r(C)

global tot = CovTA[1,1] + CovTA[2,2] + 2 * CovTA[1,2]
replace `var' = $tot if _n == 4    // var predicted log(U/(1-U))
replace `var' = CovTA[1,1] / $tot if _n == 5 // share sep
replace `var' = CovTA[2,2] / $tot if _n == 6 // share find
replace `var' = 2 * CovTA[1,2] / $tot if _n == 7 // share cov

}

keep weighted unweighted
keep if _n <= 7

export delimited using "$matlab/VarDecompWUW.csv", novarnames replace

}

*** labor share
{

use "FirmVA9607", clear	
keep if emp >= 1 & va > 0 & wages > 0
gen labcosts = wages + charsoc
gcollapse (sum) va wages labcosts inputs, by(year)
gen ls1 = wages / va
gen ls2 = labcosts / va
gen is = inputs / ( inputs + va )
su is ls1 ls2
	
}

*** wages by tenure to estimate delta
{
	
use gid year q tenure lq E using "dads_6" , clear

* construct spell indicator
gsort gid year q
bys gid: gen NewSpell = E == 1 & E[_n-1] == 0 & _n > 1
bys gid: replace NewSpell = 1 if E == 1 & _n == 1
bys gid: gen spell = sum(NewSpell)

order gid year q E NewSpell spell

bys gid: gen NumSpell = spell[_N]

* keep only employed individuals with at least two spells
keep if E == 1

* level wages
gen w = exp(lq)

* construct mean wages by year
gcollapse (mean) MeanW = w, by(year) merge

* rescaled wages
gen what = w / MeanW

* construct rescaled wages relative to start of spell
egen what0 = max( what * ( _n == 1 ) ), by(gid spell)
gen omega = what / what0

reg omega tenure // - 0.034 : at least it is negative!

save "temp/dads6wageProfile", replace



use "temp/dads6wageProfile", clear
keep if tenure >= 0 & tenure != . & tenure <= 24 // too few observations after

gcollapse (mean) omega (sum) SumOmega = omega, by(tenure)

gen domega = omega - omega[1]

gen dSumOmega = SumOmega / SumOmega[1] - 1

keep tenure domega dSum

export delimited using "$matlab/WageTenure.csv", novarnames replace

}

*** Estimate phi and unemployment scar
{

** Non-employment spells dataset
{

use "dads_6" if year >= 1997, clear

sort gid year q

gen Uspell = 0

order gid year q labforce E U EU Uspell
gen UU = U

* Use also through NE
replace EU = max(EU,EN)
replace UE = max(UE,NE)
replace UU = E != 1

bys gid: replace Uspell = 1 if EU == 1 // initialize
bys gid: replace Uspell = 1 if Uspell[_n-1] == 1 & _n > 1 & UU == 1 // fill in
bys gid: replace Uspell = 1 if Uspell[_n-1] == 1 & UE[_n-1] == 1 // finalize

drop EU UE UU

* index U spellss for each individual
sort gid year q
gen UspellCount = 0
bys gid: replace UspellCount = UspellCount[_n-1] + ( Uspell == 1 & Uspell[_n-1] == 0 ) if _n > 1

keep if Uspell == 1

sort gid UspellCount year q
bys gid UspellCount: gen SpellLength = _N - 1

bys gid UspellCount: gen keepit =  ( _n == 1 ) | ( _n == _N )
keep if keepit == 1

egen minE = min(E), by(gid)
keep if minE == 1
count // 420k

sort gid UspellCount year q

bys gid UspellCount: gen Dlq = exp(lq[_n+1]) / exp(lq) - 1   if mod(_n,2) == 1 & _n < _N
bys gid UspellCount: gen Hlq = 2 * ( exp(lq[_n+1]) - exp(lq) ) / ( exp(lq[_n+1]) + exp(lq) ) if mod(_n,2) == 1 & _n < _N
bys gid UspellCount: gen dlq = lq[_n+1] - lq   if mod(_n,2) == 1 & _n < _N

keep gid year q UspellCount SpellLength g* lq dlq Dlq Hlq Skill tenure age ze
bys gid UspellCount: keep if _n == 1

save "temp/NonEmpSpellsWageGrowth", replace

}

** General employment spells dataset
{

use "dads_6" if year >= 1997, clear
sort gid year q

* index E spellss for each individual
sort gid year q
gen EspellCount = 0
bys gid: replace EspellCount = 1 if E == 1 & _n == 1
bys gid: replace EspellCount = EspellCount[_n-1] + ( E == 1 & E[_n-1] == 0 ) if _n > 1

keep if EspellCount
keep if E == 1

sort gid EspellCount year q
bys gid EspellCount: gen SpellLength = _N
bys gid EspellCount: gen keepit =  ( _n == 1 ) | ( _n == _N )
keep if keepit == 1

egen maxE = max(E), by(gid)
keep if maxE == 1

count // 1.9m

sort gid EspellCount year q

bys gid EspellCount: gen Dlq = exp(lq[_n+1]) / exp(lq) - 1   if mod(_n,2) == 1 & _n < _N
bys gid EspellCount: gen Hlq = 2 * ( exp(lq[_n+1]) - exp(lq) ) / ( exp(lq[_n+1]) + exp(lq) ) if mod(_n,2) == 1 & _n < _N
bys gid EspellCount: gen dlq = lq[_n+1] - lq   if mod(_n,2) == 1 & _n < _N

keep gid year q EspellCount SpellLength g* lq dlq Dlq Hlq Skill tenure age ze
bys gid EspellCount: keep if _n == 1

save "temp/EmpSpellsWageGrowth", replace

}

** DiD with only unemployed workers
{

use "temp/EmpSpellsWageGrowth", clear
*use "temp/SpecEmpSpellsWageGrowth", clear
gen U = 0
append using "temp/NonEmpSpellsWageGrowth"
replace U = 1 if U == .

keep if tenure >= 8 & tenure != . // 56k unemployed...

qui su SpellLength
replace SpellLength = SpellLength - r(mean)

gen U_SpellLength = U * SpellLength

gen lqPost = lq + dlq
gen agebin = floor(age / 5 ) 

reghdfe lqPost U_SpellLength if U== 1	///
		, a(year) cluster(ze gapet2)
estimates store phi1

reghdfe lqPost U_SpellLength lqgwc if U== 1 ///
		, a(year) cluster(ze gapet2)
estimates store phi2

reghdfe lqPost U_SpellLength Skill lqgwc if U== 1  ///			
		, a(year) cluster(ze gapet2)
estimates store phi3

reghdfe lqPost U_SpellLength Skill lqgwc if U== 1  ///			
		, a(year gapet2 ze) cluster(ze gapet2)
estimates store phi4

reghdfe lqPost U_SpellLength lqgwc  if U== 1 ///			
		, a(year gapet2 ze gid) cluster(ze gapet2)
estimates store phi5


reghdfe lqPost U_SpellLength SpellLength U lqgwc  ///			
		, a(year gapet2 ze gid) cluster(ze gapet2)
estimates store phi6




		
local Results phi1 phi2 phi3 phi4 phi5 phi6
			 
local OrderVar U_SpellLength U SpellLength lqgwc Skill

local rhslabperc U_SpellLength 			"\hskip3mm Job loss $\times$ Duration" ///
				 U 						"\hskip3mm Job loss" ///
				 SpellLength			"\hskip3mm Duration" ///
				 lqgwc					"\hskip3mm Pre log wage" ///
				 Skill					"\hskip3mm Skill" 
				 
estfe `Results' , ///
	labels(year 			"\hskip3mm Year" ///
		   gapet2			"\hskip3mm 2-digit Industry" ///
		   ze				"\hskip3mm City" ///
		   gid 				"\hskip3mm Worker")		  

* Export tables				 
esttab `Results' using "$tables/UnempScar.tex", replace ///
	booktabs b(2) se(2) alignment(l l) nomtitles ///
	mgroups("Unemployed only" "DiD", pattern(1 0 0 0 0 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	title(Unemployment scar estimation. LHS = Post log wage) ///
	order(`OrderVar' "\textit{Fixed Effects}") /// 
	coeflabel(`rhslabperc') ///
	drop(_cons) ///
	indicate(`r(indicate_fe)' , labels("\multicolumn{1}{c}{\checkmark}"  "")) ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 r2_within, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis, two-way clustered by city and 2-digit industry." ///
			 "\tiny $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$.")



}

}

*** Validate amenities
{

import delimited ${matlab2}/temp/Amenities081521.csv, clear
rename v1 ze
rename v2 la

merge 1:1 ze using "AmenitiesHousePrices2007ZE", nogen keep(3)
keep if temperature > 0

global Num NumBasic NumEducation NumHealth NumCommercial NumTourism

foreach var in $Num {
gen `var'PC = `var' / pop
gen `var'PA = `var' / area
}

global NumPC NumBasicPC NumEducationPC NumHealthPC NumCommercialPC NumTourismPC
global NumPA NumBasicPA NumEducationPA NumHealthPA NumCommercialPA NumTourismPA

gen educ = NumEducation / NumCommercial
gen health = NumHealth / NumCommercial

gen educ2 = ( 1 + NumEducation ) / ( 1 + NumCommercial )
gen health2 = ( 1 + NumHealth ) / ( 1 + NumCommercial )


foreach var in temperature rain sun educ health $Num $NumPC $NumPA {
qui su `var'
gen l`var' = log(`var')
replace `var' = ( `var' - r(mean) ) / r(sd)
}

gen lpop = log(pop)
gen a = exp(la)
qui su a
replace a = ( a - r(mean) ) / r(sd)


reg la lsun lNumBasicPA lNumEducationPA lNumHealthPA lNumCommercialPA, r
estimates store ra
			
local dire $tables
local date = subinstr("$S_DATE"," ","",.) 
local time = substr(subinstr("$S_TIME",":","",.),1,3) // hour and tens of minutes 
local date `date'Time`time'

local RefCat lsun 				"\textit{Weather}" ///
			 lNumBasicPA	"\textit{Services}"
			 
local OrderVar lsun lNumBasicPA lNumEducationPA lNumHealthPA lNumCommercialPA

local rhslabperc lsun 				"\hskip5mm Sun hours" ///
				 lNumBasicPA 		"\hskip5mm Basic public" ///
				 lNumEducationPA 	"\hskip5mm Education" ///
				 lNumHealthPA		"\hskip5mm Health" ///
				 lNumCommercialPA	"\hskip5mm Commercial" 
				 

* Export tables				 
esttab ra using "${tables}/Amenities.tex", replace ///
	booktabs b(3) se(3) alignment(c c) ///
	nomtitles nocons ///
	title(Correlation of estimated amenities with observables.) ///
	refcat(`RefCat', nolabel) /// 
	order(`OrderVar') /// 
	coeflabel(`rhslabperc') ///
	star(+ 0.10 * 0.05 ** 0.01) ///
	stats(N r2, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Robust S.E. in parenthesis." ///
			 "\tiny $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01$." ///
			 "\tiny Log amenities on log sun hours per month" ///
			 "\tiny and log service establishments.")

}

*** Correlations at city level
{
	
* store vacancy regressions	
use "temp/Acemo0307Ze_02", replace

gen lUmU = log(U/(1-U))
qui su lUmU
replace lUmU = lUmU - `r(mean)'
replace U = 100 * U

gen lU = log(U)
qui su lU
replace lU = lU - `r(mean)'

* tightness regressions in data
foreach i in 1 2 {
	reg lTheta`i' lUmU
	estimates store lTheta`i'
}


* load model regressions
import delimited "${root2}/source/correlationsVac.csv", case(preserve) clear 

* flows regressions in model
foreach var in Unorm EUnorm UEnorm {
	reg `var' CitylWN lCityPopN [w=weights]
	estimates store `var'Model
}

* vacancy regression in model
reg lThetaBase lUmUBase [w=weights]
estimates store lThetaModelBase

reg lThetaGamma3 lUmUGamma3 [w=weightsGamma3]
estimates store lThetaModelGamma3

* load flows regression in data
foreach var in Unorm EUnorm UEnorm {
	foreach i in 1 2 3 {
		estimates use ${root2}/output/temp/`var'l`i'.ster
		estimates store `var'l`i'
	}
}


* produce table
local Results EUnorml3 EUnormModel UEnorml3 UEnormModel lTheta2 lTheta1 lThetaModelBase lThetaModelGamma3 
local OrderVar CitylWN lCityPopN lUmU lUmUBase lUmUGamma3
local rhslabperc CitylWN 			"Log city wage" ///
				 lCityPopN 			"Log city population" ///
				 lUmU 				"Log unemp.-emp. ratio" ///
				 lUmUBase 			"Log unemp.-emp. ratio" ///
				 lUmUGamma3 		"Log unemp.-emp. ratio"				 
estfe `Results' , ///
	labels(year 			"\hskip3mm Year" ///
		   gapet3#year		"\hskip3mm Industry-Year" ///
		   gid				"\hskip3mm Worker")		  

di `r(indicate_fe)'

* Export tables				 
esttab `Results' using "${tables}/OLSUEUUEmodelVac.tex", replace ///
	booktabs b(2) se(2) alignment(l l) nomtitles nocons ///
	mgroups("Job loss" "Job finding" "Labor market tightness", pattern(1 0 1 0 1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	title(OLS regressions of job loss and job finding probabilities and labor market tightness \label{table:OLSUEUUE}) ///
	order(`OrderVar' "\textit{Fixed Effects}") /// 
	coeflabel(`rhslabperc') drop( _cons ) ///
	indicate( `r(indicate_fe)' , labels("\multicolumn{1}{c}{\checkmark}"  "")) /// indicate(`FEs') /// , labels("\multicolumn{1}{c}{\checkmark}"  "")) ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) ///
	stats(N r2 r2_within, fmt(0 3) layout("\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}" "\multicolumn{1}{c}{@}") ///
		  labels(`"Obs."' `"\(R^{2}\)"' `"W.-\(R^{2}\)"'  ) ///
		  ) ///
	nonotes ///
	addnotes("\tiny Standard errors in parenthesis, two-way clustered by city and 3-digit industry. $^+ \ p<0.10, \ ^* \ p<0.05, \ ^{**}\ p<0.01, \ ^{***}\ p<0.001$." ///
			 "\tiny City population by km2 (density). Quarterly frequency, 1997-2007. Movers only." ///
			 "\tiny Left-hand-side variable relative to unconditional mean. Right-hand-side variables standardized." ///
			 "\tiny Wages = mean log wage.")

	
}

*** ZFU
{

import delimited ${root2}/source/ZFU/PopulationCommuneZFU.csv, delimiter(comma) case(preserve) encoding(utf8) clear 
drop Nom
rename PopulationmunicipaleenZFU PopMunZFU
rename PopulationtotaleenZFU PopTotZFU
rename Code com
tostring com, replace force

save "temp/ZFUpop", replace

import delimited ${root2}/source/ZFU/pop2006.csv, delimiter(comma) case(preserve) encoding(utf8) clear 
merge 1:1 com using "temp/ZFUpop"
keep if _m == 3
drop _m

gen FractionInZFU = PopMun / pop
su Fraction
keep com pop Fraction
save "temp/ZFUfracpop", replace


}

*** Maps of welfare gains
{

import delimited "$root2/source/WelfareGainsMap.csv", clear 

* Merge with map data	
merge 1:m ze using "ZE2011"
foreach var in welfare du pop {
qui su `var'
replace `var' = r(mean) if _m == 2
}

replace du       = floor( du  * 1000 ) / 1000
replace welfare  = floor( welfare  * 10 ) / 10
replace tfp  	 = floor( tfp  * 10 ) / 10

rename ze City

merge 1:1 City using "temp/FranceUrateDecomps", keepusing(U) nogen keep(1 3)
rename City ze

qui su U [w=pop]
replace U = r(mean) if U == .

gen U0 = 100 * U
gen U1 = 100 * ( U + du )

foreach var in U0 U1 {
replace `var' = floor( `var'  * 1000 ) / 1000
}

* produce welfare map
spmap welfare using "$root2/source/ContourZE2011" ///
	, id(_ID) ///
	clmethod(quantile) clnumber(6) ///
	ndfcolor(Black) fcolor(Blues) ///
	legend(on) legstyle(2) ///
	name(WelfareGains, replace)
graph export "${graphs}/WelfareGainsMap.pdf", name(WelfareGains) replace

replace tfp = 0 if tfp == . // to deal with corsica and the legend

* produce tfp map
spmap tfp using "$root2/source/ContourZE2011" ///
	, id(_ID) /// 
	clmethod(quantile) clnumber(6) ///
	ndfcolor(Black) fcolor(Blues) ///
	legend(on) legstyle(2) ndlabel(none) ///
	name(TfpGains, replace)
graph export "${graphs}/TfpGainsMap.pdf", name(TfpGains) replace

}

}






